o
    im                    @   s  d dl Z d dlZd dlZd dlZd dlZd dlZd dlZd dlZd dlZd dl	Z	d dl
Z
d dlZd dlZd dlZd dlZd dlZd dlmZ d dlmZ d dlmZ d dlmZ d dlmZmZmZmZmZmZmZm Z m!Z!m"Z"m#Z#m$Z$m%Z%m&Z&m'Z'm(Z(m)Z) d dl*Z*d dl+Z+d dl,Z,d dl-Z-d dl*m.Z.m/Z/ d dl0m1Z1 d d	l2m3Z3m4Z4 d d
l5m6Z6m7Z7 d dl8m9Z9m:Z:m;Z;m<Z<m=Z=m>Z>m?Z? zd dl@ZAW n eBy   dZAY nw d dl8mCZCmDZDmEZE ddlFmGZG ddlHmIZImAZAmJZJmKZK ddlLmMZMmNZNmOZO ddlPmQZQ erddlRmSZSmTZT ddlUmVZVmWZW ddlXmYZY e+Ze+j[j\Z]dZ^g dZ_g dZ`i ddgddgddgddgd d!gd"d#gd$d%d&gd'd(gd)d*d+gd,d-gd.d/d0gd1d2d3gd4d5gd6d7gd8d9gd:d;d<gd=d>d?gi d@dAgdBdCgdDdEdFgdGdHgdIdJgdKdLgdMdNdOgdPdQgdRdSdTgdUdVgdWdXgdYdZd[gd\d]d^gd_d`gdadbgdcddgdedfgi dgdhgdidjgdkdlgdmdngdodpgdqdrgdsdtdugdvdwgdxdygdzd{d|gd}d~gddgdddgddgddgddgdg di ddgddgddgdddgddgdddgdddgddgddgddgddgddgddgddgddgddgddgdgdgdgddgdgdZaebdZced Zeeefegd echee G dd dZiG dd de-jjZjG dd dekZlG dd demZndeodepfddńZqdeode#eo fddǄZrdeode'd fddʄZsdeode'd ddfdd΄ZtdedefddфZude(eoef de(ekemf fddӄZvdedefddքZwen Zxdexexexel d؜de(eoef de(depf de(eoeeo f de(eoeeo f de(eoeeo f de(eeoef e:f ddfddZydexexexel d؜deode(depf de(eoeeo f de(eoeeo f de(eoeeo f de(eeoef e:f ddfddZzddexexexel ddede#eeoef  de(depf de(eoeeo f de(eoeeo f de(eoeeo f de(eeoef e:f ddfddZ{el dexexexdddde(eeoef e:f deeoef de(depf de(eoeeo f de(eoeeo f de(eoeeo f depdepddfddZ|de(eeoef e:f deeoeeoef f fddZ}de:de e#eo  de&edf fddZ~dexexexel d؜de(eeof de(depf de(eoeeo f de(eoeeo f de(eoeeo f de(eeoef e:f ddfddZel dfde(eoef deeoef depde:fddZel dfdeodeeoef depfddZde eo fddZdeode#eo fd dZ	אdɐdeodeodepde#ep fddZ	אdɐdeodepde#ep fddZd	eode&eoeof fd
dZdeodeofddZdeode#eo fddZdeodepfddZdeodeofddZdeode#eo fddZdeodeodepfddZde(eoef deeoef fddZde(eoef deeoef fddZdeodepfddZdeodefd d!Zd"e<d#e<d$e<ddfd%d&Zd'eode eo fd(d)Zddd*d'e(eoe eo f d+e#e d,epdejfd-d.Zede(eoef dee fd/d0Zedeeddf fd1d2Zdepfd3d4Zdepfd5d6Zd7edeofd8d9Zd:ed;edepfd<d=Z	אdʐd>epd?epde#eI fd@dAZdBdC Zde(eoef de$fdDdEZdFee(eoe$f  de$fdGdHZdFee(eoe$f  de$fdIdJZdFee(eoe$f  de$fdKdLZdMeeogef deeogef fdNdOZdMeeogef dPeeoef dQeodefdRdSZdTeeoe ek f deeoe ek f fdUdVZdWeeoe ek f dXeodYeodeeoe ek f fdZd[Z	dːd\ed]ed^ed_e#e de&eef f
d`daZdbedc de dc fdddeZdbedc de dc fdfdgZdhdi Zdjeeoeg ef f deeo defdkdlZdmedneeoeegef f deeo ddfdodpZdjeeoeg ef f deeo deeoef fdqdrZdseeoef dneeoeegef f deeo deeoef fdtduZde(eoef dveeoeegdf f deeo defdwdxZde(eoef dyeeoeegdf f deeo defdzd{Zdeod|e(eoef defd}d~ZdeodeofddZdeodeofddZdeeo deode&e eo e ep f fddZde(eeoef e:f de:fddZdeeoef deeoekf fddZddd7eeoekf depdeeoef fddZde:deofddZde:deodeddfddZg fdddeeoef de eo depdee&e eo ef  fddZdede eo fddZel fde eeoe#e f  deeoe#e f deeoe#e f fddZG dd dZde>fddZdd ZdedepfddZdeodepfddZded dddeodeeode d ege"f de!eoef ded fddZĐdd ZŐdd ZƐdd Zǐdd ZdefddZdeeoe eo f fddZʐdd Zːd̐dedeodepfdĐdńZ̐d͐d]edeodepdefdǐdȄZdS (      N)defaultdict)contextmanager)Path)
ModuleType)TYPE_CHECKINGAnyCallableDict	GeneratorIterableIteratorListMappingNoReturnOptionalPatternSetTupleTypeUnioncast)RegistryRegistryError)Requirement)InvalidSpecifierSpecifierSet)InvalidVersionVersion)AdamConfigConfigValidationErrorModelNumpyOps	Optimizerget_current_ops)compoundingdecayingfix_random_seed   )about)
CudaStreamcupyimportlib_metadata
is_windows)OLD_MODEL_SHORTCUTSErrorsWarningsORTH)LanguagePipeCallable)DocSpan)Vocabi)csdadeelengrcidlbmkptrusrtath)	paths	variablessystemnlp
componentscorporatrainingpretraining
initializeafaframamhararaazazebgbulbnbenbobodtibcacatr8   cesczer9   danr:   deugerr;   ellgrer<   engesspaetesteueusbaqfafasperfifinfofaofrfrafregaglegdglagugujhehebiwhihinhrhrvscrhuhunhyhyer>   indinisisliceititajajpnknkankokorkykirlalatr?   ltzlglugltlitlvlavr@   mkdmacmlmalmrmarmsmsamaynbnobnenepnlnlddutnnnnoplpolrA   porro)ronrommomolrB   russasansisinskslksloslslvsqsqialbrC   srpsccsvswerD   tamtetelrE   thatitirtltgltntsntrturtttatukukrurdviwyorzhochimul)urviyozhxxspacyz)[%(asctime)s] [%(levelname)s] %(message)sc                   @   s   e Zd ZdZdS )ENV_VARSSPACY_CONFIG_OVERRIDESN)__name__
__module____qualname__CONFIG_OVERRIDES r   r   >/home/ubuntu/.local/lib/python3.10/site-packages/spacy/util.pyr      s    r   c                   @   s  e Zd ZejddddZejddddZejddddZejddddZejddddZ	ejdd	ddZ
ejdd
ddZejddddZejddddZejddddZejddddZejddddZejddddZejddddZejddddZeddZejddddZejddddZed$ddZedee fddZedededefddZedededeeee ee!f  f fd d!Z"ededede#fd"d#Z$dS )%registryr   	languagesT)entry_pointsarchitectures
tokenizerslemmatizerslookupsdisplacy_colorsmisc	callbacksbatchersreaders
augmentersloggersscorersvectors	factoriesinternal_factoriesmodelsclireturnNc                 C   s"   ddl m}m} |s|  dS dS )z?Ensure the registry is populated with all necessary components.r(   )REGISTRY_POPULATEDpopulate_registryN)registrationsr  r  )clsr  r  r   r   r   ensure_populated   s   
zregistry.ensure_populatedc                 C   sF   |    g }t| D ]\}}|dst|tr|| qt|S )zList all available registries._)r  inspect
getmembers
startswith
isinstancer   appendsorted)r
  namesnamevaluer   r   r   get_registry_names   s   
zregistry.get_registry_namesregistry_name	func_namec                 C      |    t| |sd|  pd}ttjj||dt| |}z|	|}W |S  tyg   |
drN|dd}z|	|W  Y S  tjyM   Y nw dt|  pZd}ttjj|||ddw )z,Get a registered function from the registry., noner  	availablespacy.spacy-legacy.r  reg_namer  N)r  hasattrjoinr  r   r/   E892formatgetattrgetr  replace	cataloguer  get_allkeysE893)r
  r  r  r  regfunclegacy_namer  r   r   r   r'     s2   


zregistry.getc                 C   r  )au  Find information about a registered function, including the
        module and path to the file it's defined in, the line number and the
        docstring, if available.

        registry_name (str): Name of the catalogue registry.
        func_name (str): Name of the registered function.
        RETURNS (Dict[str, Optional[Union[str, int]]]): The function info.
        r  r  r  r  r  r   N)r  r"  r#  r  r   r/   r$  r%  r&  findr  r(  r)  r  r*  r+  r,  )r
  r  r  r  r-  	func_infor/  r  r   r   r   r0     s2   


zregistry.findc                 C   sN   |    t| |sdS t| |}|dr#|dd}||v p"||v S ||v S )z4Check whether a function is available in a registry.Fr  r  )r  r"  r&  r  r(  )r
  r  r  r-  r/  r   r   r   has#  s   


zregistry.has)r  N)%r   r   r   r)  creater   r   r   r   r   r   r   r   r   r   r   r   r   r  _entry_point_factoriesr  r  r  classmethodr  r   strr  r   r'  r	   r   r   intr0  boolr2  r   r   r   r   r      sF    	#r   c                       sL   e Zd ZdZejddeddf fddZdd	 Zdd
dZ	dd Z
  ZS )SimpleFrozenDictzSimplified implementation of a frozen dict, mainly used as default
    function or method argument (for arguments that should default to empty
    dictionary). Will raise an error if user or spaCy attempts to add to dict.
    errorr;  r  Nc                   s   t  j|i | || _dS )zInitialize the frozen dict. Can be initialized with pre-defined
        values.

        error (str): The error message when user tries to assign to dict.
        N)super__init__r;  )selfr;  argskwargs	__class__r   r   r=  6  s   
zSimpleFrozenDict.__init__c                 C   
   t | jNNotImplementedErrorr;  )r>  keyr  r   r   r   __setitem__?     
zSimpleFrozenDict.__setitem__c                 C   rC  rD  rE  )r>  rG  defaultr   r   r   popB  rI  zSimpleFrozenDict.popc                 C   rC  rD  rE  )r>  otherr   r   r   updateE  rI  zSimpleFrozenDict.updaterD  )r   r   r   __doc__r/   E095r6  r=  rH  rK  rM  __classcell__r   r   rA  r   r9  0  s    	
r9  c                       sr   e Zd ZdZejddeddf fddZdd	 Zd
d Z	dd Z
dd Zdd Zdd Zdd Zdd Z  ZS )SimpleFrozenLista  Wrapper class around a list that lets us raise custom errors if certain
    attributes/methods are accessed. Mostly used for properties like
    Language.pipeline that return an immutable list (and that we don't want to
    convert to a tuple to not break too much backwards compatibility). If a user
    accidentally calls nlp.pipeline.append(), we can raise a more helpful error.
    r:  r;  r  Nc                   s   || _ t j|  dS )zpInitialize the frozen list.

        error (str): The error message when user tries to mutate the list.
        N)r;  r<  r=  )r>  r;  r?  rA  r   r   r=  Q  s   zSimpleFrozenList.__init__c                 O   rC  rD  rE  r>  r?  r@  r   r   r   r  Y  rI  zSimpleFrozenList.appendc                 O   rC  rD  rE  rR  r   r   r   clear\  rI  zSimpleFrozenList.clearc                 O   rC  rD  rE  rR  r   r   r   extend_  rI  zSimpleFrozenList.extendc                 O   rC  rD  rE  rR  r   r   r   insertb  rI  zSimpleFrozenList.insertc                 O   rC  rD  rE  rR  r   r   r   rK  e  rI  zSimpleFrozenList.popc                 O   rC  rD  rE  rR  r   r   r   removeh  rI  zSimpleFrozenList.removec                 O   rC  rD  rE  rR  r   r   r   reversek  rI  zSimpleFrozenList.reversec                 O   rC  rD  rE  rR  r   r   r   sortn  rI  zSimpleFrozenList.sort)r   r   r   rN  r/   E927r6  r=  r  rS  rT  rU  rK  rV  rW  rX  rP  r   r   rA  r   rQ  I  s    rQ  langr  c                 C   s
   | t jv S )a  Check whether a Language class is already loaded. Language classes are
    loaded lazily, to avoid expensive setup code associated with the language
    data.

    lang (str): Two-letter language code, e.g. 'en'.
    RETURNS (bool): Whether a Language class has been loaded.
    )r   r   )rZ  r   r   r   lang_class_is_loadedr  s   
r[  c                 C   s.   ddl }t D ]\}}| |v r|  S qdS )a  
    Given a two-letter ISO 639-1 or three-letter ISO 639-3 language code,
    find a supported spaCy language.

    Returns the language code if a matching language is available, or None
    if there is no matching language.

    >>> find_matching_language('fra')  # ISO 639-3 code for French
    'fr'
    >>> find_matching_language('fre')  # ISO 639-2/B code for French
    'fr'
    >>> find_matching_language('iw')  # Obsolete ISO 639-1 code for Hebrew
    'he'
    >>> find_matching_language('mo')  # Deprecated code for Moldavian
    'ro'
    >>> find_matching_language('scc')  # Deprecated ISO 639-2/B code for Serbian
    'sr'
    >>> find_matching_language('zxx')
    None
    r   N)
spacy.langLANG_ALIASESitems)rZ  r   	lang_codealiasesr   r   r   find_matching_language}  s   ra  r3   c              
   C   s   | t jv rt j| S ztd|  d}W n/ tyE } z#t| }|r0|} td|  d}nttjj	| |d|W Y d}~nd}~ww t
| t||jd  t j| S )zImport and load a Language class.

    lang (str): Two-letter ISO 639-1 or three-letter ISO 639-3 language code, such as 'en' and 'eng'.
    RETURNS (Language): Language class.
    z.lang.r   )rZ  errNr   )r   r   r'  	importlibimport_moduleImportErrorra  r/   E048r%  set_lang_classr&  __all__)rZ  modulerb  matchr   r   r   get_lang_class  s   

rk  r  r
  c                 C   s   t jj| |d dS )zSet a custom Language class name that can be loaded via get_lang_class.

    name (str): Name of Language class.
    cls (Language): Language class.
    )r.  N)r   r   register)r  r
  r   r   r   rg       rg  pathc                 C   s   t | tr	t| S | S )zEnsure string is converted to a Path.

    path (Any): Anything. If string, it's converted to Path.
    RETURNS: Path or original argument.
    )r  r6  r   rn  r   r   r   ensure_path  s   
rp  c                 C   sN   t | } |  rt| S | | jd } |  rt| S ttj	j
| d)zLoad JSON language data using the given path as a base. If the provided
    path isn't present, will attempt to load a gzipped version before giving up.

    path (str / Path): The data to load.
    RETURNS: The loaded data.
    z.gzro  )rp  existssrsly	read_jsonwith_suffixsuffixread_gzip_json
ValueErrorr/   E160r%  ro  r   r   r   load_language_data  s   

ry  ri  c                 C   s@   t | dsttjjt| dtttj	t
j| j j}|jS )zpGet the path of a Python module.

    module (ModuleType): The Python module.
    RETURNS (Path): The path.
    r   )ri  )r"  rw  r/   E169r%  reprr   r   osPathLikesysmodulesr   __file__parent)ri  	file_pathr   r   r   get_module_path  s   
r  Tvocabdisableenableexcludeconfigr  r7   r  r  r  r  c                C   s   |||||d}t | tr8| drt| dd S t| r't| fi |S t|  r7t	t| fi |S nt
| drEt	| fi |S | tv rUttjj| t|  dttjj| d)a  Load a model from a package or data path.

    name (str): Package name or model path.
    vocab (Vocab / True): Optional vocab to pass in on initialization. If True,
        a new Vocab object will be created.
    disable (Union[str, Iterable[str]]): Name(s) of pipeline component(s) to disable.
    enable (Union[str, Iterable[str]]): Name(s) of pipeline component(s) to enable. All others will be disabled.
    exclude (Union[str, Iterable[str]]):  Name(s) of pipeline component(s) to exclude.
    config (Dict[str, Any] / Config): Config overrides as nested dict or dict
        keyed by section values in dot notation.
    RETURNS (Language): The loaded nlp object.
    r  zblank: rq  )r  fullr  )r  r6  r  rk  r(  
is_packageload_model_from_packager   rq  load_model_from_pathr"  r.   IOErrorr/   E941r%  E050)r  r  r  r  r  r  r@  r   r   r   
load_model  s&   


r  c                C   s   t | }|j|||||dS )a  Load a model from an installed package.

    name (str): The package name.
    vocab (Vocab / True): Optional vocab to pass in on initialization. If True,
        a new Vocab object will be created.
    disable (Union[str, Iterable[str]]): Name(s) of pipeline component(s) to disable. Disabled
        pipes will be loaded but they won't be run unless you explicitly
        enable them by calling nlp.enable_pipe.
    enable (Union[str, Iterable[str]]): Name(s) of pipeline component(s) to enable. All other
        pipes will be disabled (and can be enabled using `nlp.enable_pipe`).
    exclude (Union[str, Iterable[str]]): Name(s) of pipeline component(s) to exclude. Excluded
        components won't be loaded.
    config (Dict[str, Any] / Config): Config overrides as nested dict or dict
        keyed by section values in dot notation.
    RETURNS (Language): The loaded nlp object.
    r  )rc  rd  load)r  r  r  r  r  r  r
  r   r   r   r    s   
r  )metar  r  r  r  r  
model_pathr  c          
      C   sj   |   sttjj| d|st| }| d }t|dd}t||d}t||||||d}	|	j	| ||dS )a  Load a model from a data directory path. Creates Language class with
    pipeline from config.cfg and then calls from_disk() with path.

    model_path (Path): Model path.
    meta (Dict[str, Any]): Optional model meta.
    vocab (Vocab / True): Optional vocab to pass in on initialization. If True,
        a new Vocab object will be created.
    disable (Union[str, Iterable[str]]): Name(s) of pipeline component(s) to disable. Disabled
        pipes will be loaded but they won't be run unless you explicitly
        enable them by calling nlp.enable_pipe.
    enable (Union[str, Iterable[str]]): Name(s) of pipeline component(s) to enable. All other
        pipes will be disabled (and can be enabled using `nlp.enable_pipe`).
    exclude (Union[str, Iterable[str]]): Name(s) of pipeline component(s) to exclude. Excluded
        components won't be loaded.
    config (Dict[str, Any] / Config): Config overrides as nested dict or dict
        keyed by section values in dot notation.
    RETURNS (Language): The loaded nlp object.
    ro  z
config.cfgTfor_overrides)	overrides)r  r  r  r  r  )r  r  )
rq  r  r/   E052r%  get_model_metadict_to_dotload_configload_model_from_config	from_disk)
r  r  r  r  r  r  r  config_pathr  rI   r   r   r   r  3  s    r  F)r  r  r  r  r  	auto_fillvalidater  r  c             
   C   sr   d| vrt tjj| d| d }d|vs|d du r$t tjj|dt|d }	|	j| |||||||d}
|
S )a%  Create an nlp object from a config. Expects the full config file including
    a section "nlp" containing the settings for the nlp object.

    name (str): Package name or model path.
    meta (Dict[str, Any]): Optional model meta.
    vocab (Vocab / True): Optional vocab to pass in on initialization. If True,
        a new Vocab object will be created.
    disable (Union[str, Iterable[str]]): Name(s) of pipeline component(s) to disable. Disabled
        pipes will be loaded but they won't be run unless you explicitly
        enable them by calling nlp.enable_pipe.
    enable (Union[str, Iterable[str]]): Name(s) of pipeline component(s) to enable. All other
        pipes will be disabled (and can be enabled using `nlp.enable_pipe`).
    exclude (Union[str, Iterable[str]]): Name(s) of pipeline component(s) to exclude. Excluded
        components won't be loaded.
    auto_fill (bool): Whether to auto-fill config with missing defaults.
    validate (bool): Whether to show config validation errors.
    RETURNS (Language): The loaded nlp object.
    rI   r  rZ  N)r  r  r  r  r  r  r  )rw  r/   E985r%  E993rk  from_config)r  r  r  r  r  r  r  r  
nlp_configlang_clsrI   r   r   r   r  a  s"   
r  c                 C   s   dd |  di  D S )zRETURNS (List[str]): All sourced components in the original config,
    e.g. {"source": "en_core_web_sm"}. If the config contains a key
    "factory", we assume it refers to a component factory.
    c                 S   s&   i | ]\}}d |vrd|v r||qS )factorysourcer   ).0r  cfgr   r   r   
<dictcomp>  s
    z*get_sourced_components.<locals>.<dictcomp>rJ   )r'  r^  r  r   r   r   get_sourced_components  s   r  	dot_names.c           	   	   C   s   i }g }g }|D ]X}|du r| | q|dd }||vr=t| | r2td| | id }nt| | }|||< z
| t|| W q ty`   d| }| |d|d Y qw |rit| |dt|S )a:  Resolve one or more "dot notation" names, e.g. corpora.train.
    The paths could point anywhere into the config, so we don't know which
    top-level section we'll be looking within.

    We resolve the whole top-level section, although we could resolve less --
    we could find the lowest part of the tree.
    N.r   r  znot a valid section reference: )locmsg)r  errors)	r  splitr   
is_promiseresolvedot_to_objectKeyErrorr    tuple)	r  r  resolvedoutputr  r  sectionresultr  r   r   r   resolve_dot_names  s*   
r  	init_filec          
   	   C   sj   t | j}t|}|d  d|d  d|d  }|| }	| s*ttjj|	dt|	||||||dS )a  Helper function to use in the `load()` method of a model package's
    __init__.py.

    vocab (Vocab / True): Optional vocab to pass in on initialization. If True,
        a new Vocab object will be created.
    disable (Union[str, Iterable[str]]): Name(s) of pipeline component(s) to disable. Disabled
        pipes will be loaded but they won't be run unless you explicitly
        enable them by calling nlp.enable_pipe.
    enable (Union[str, Iterable[str]]): Name(s) of pipeline component(s) to enable. All other
        pipes will be disabled (and can be enabled using `nlp.enable_pipe`).
    exclude (Union[str, Iterable[str]]): Name(s) of pipeline component(s) to exclude. Excluded
        components won't be loaded.
    config (Dict[str, Any] / Config): Config overrides as nested dict or dict
        keyed by section values in dot notation.
    RETURNS (Language): The loaded nlp object.
    rZ  r  r  -versionro  )r  r  r  r  r  r  )	r   r  r  rq  r  r/   r  r%  r  )
r  r  r  r  r  r  r  r  data_dir	data_pathr   r   r   load_model_from_init_py  s   
 r  r  interpolatec                 C   sd   t | }ttd}t|dkr|jtj ||dS |r | s*t	t
jj|dd|j|||dS )a  Load a config file. Takes care of path validation and section order.

    path (Union[str, Path]): Path to the config file or "-" to read from stdin.
    overrides: (Dict[str, Any]): Config overrides as nested dict or
        dict keyed by section values in dot notation.
    interpolate (bool): Whether to interpolate and resolve variables.
    RETURNS (Config): The loaded config.
    section_orderr  r  r  zconfig filern  r  )rp  r   CONFIG_SECTION_ORDERr6  from_strr~  stdinreadis_filer  r/   E053r%  r  )rn  r  r  r  r  r   r   r   r    s   
r  textc                 C   s   t tdj| ||dS )zLoad a full config from a string. Wrapper around Thinc's Config.from_str.

    text (str): The string config to load.
    interpolate (bool): Whether to interpolate and resolve variables.
    RETURNS (Config): The loaded config.
    r  r  )r   r  r  )r  r  r  r   r   r   load_config_from_str  s   
	r  c                   C   s   t tj  S )z~List all model packages currently installed in the environment.

    RETURNS (List[str]): The string names of the models.
    )listr   r  r*  r+  r   r   r   r   get_installed_models     r  c                 C   s$   zt | W S  t jy   Y dS w )zGet the version of an installed package. Typically used to get model
    package versions.

    name (str): The name of the installed Python package.
    RETURNS (str / None): The version or None if package not installed.
    N)r,   r  PackageNotFoundErrorr  r   r   r   get_package_version!  s
   r  r  
constraintprereleasesc              	   C   sR   |d   rd| }z
t|}t| } W n ttfy!   Y dS w ||_| |v S )a  Check if a version (e.g. "2.0.0") is compatible given a version
    constraint (e.g. ">=1.9.0,<2.2.1"). If the constraint is a specific version,
    it's interpreted as =={version}.

    version (str): The version to check.
    constraint (str): The constraint string.
    prereleases (bool): Whether to allow prereleases. If set to False,
        prerelease versions will be considered incompatible.
    RETURNS (bool / None): Whether the version is compatible, or None if the
        version or constraint are invalid.
    r   ==N)isdigitr   r   r   r   r  )r  r  r  specr   r   r   is_compatible_version.  s   
r  c                 C   s   | d   rdS zt| }W n
 ty   Y d S w ||_dd |D }t|dkr2|d jdv r2dS tdd	 |D r=dS td
d	 |D }tdd	 |D }|rU|rUdS dS )Nr   Fc                 S   s   g | ]}|qS r   r   r  spr   r   r   
<listcomp>S  s    z,is_unconstrained_version.<locals>.<listcomp>r(   >>=Tc                 s       | ]}|j d v V  qdS )r  Noperatorr  r   r   r   	<genexpr>X      z+is_unconstrained_version.<locals>.<genexpr>c                 s   r  ))<z<=Nr  r  r   r   r   r  Z  r  c                 s   r  )r  Nr  r  r   r   r   r  [  r  )r  r   r   r  lenr  any)r  r  r  specs	has_upper	has_lowerr   r   r   is_unconstrained_versionH  s$   r  requirementc                 C   s   t | }|jt|jfS )z@Split a requirement like spacy>=1.2.3 into ("spacy", ">=1.2.3").)r   r  r6  	specifier)r  reqr   r   r   split_requirementc  s   r  c                 C   s.   t | j}d|  d|d  d|d d  dS )z_Generate a version range like >=1.2.3,<1.3.0 based on a given version
    (e.g. of spaCy).
    r  z,<r   r  r(   z.0)r   release)r  r  r   r   r   get_minor_version_rangei  s   
$r  c                 C   sD   zt | }|D ]}|jdv r|j  W S qW dS  ty!   Y dS w )z>From a version range like >=1.2.3,<1.3.0 return the lower pin.)r  r  z~=N)r   r  r  	Exception)r  specsetr  r   r   r   get_model_lower_versionq  s   
r  c                 C   
   t | jS )zCheck whether a version is a prerelease version.

    version (str): The version, e.g. "3.0.0.dev1".
    RETURNS (bool): Whether the version is a prerelease version.
    )r   is_prereleaser  r   r   r   is_prerelease_version}     
r  c                 C   r  )zGenerate the base version without any prerelease identifiers.

    version (str): The version, e.g. "3.0.0.dev1".
    RETURNS (str): The base version, e.g. "3.0.0".
    )r   base_versionr  r   r   r   get_base_version  r  r  c              	   C   s8   zt | }W n ttfy   Y dS w |j d|j S )zGet the major + minor version (without patch or prerelease identifiers).

    version (str): The version.
    RETURNS (str): The major + minor version or None if version is invalid.
    Nr  )r   	TypeErrorr   majorminor)r  vr   r   r   get_minor_version  s   r  	version_a	version_bc                 C   s(   t | }t |}|duo|duo||kS )aN  Compare two versions and check if they match in major and minor, without
    patch or prerelease identifiers. Used internally for compatibility checks
    that should be insensitive to patch releases.

    version_a (str): The first version
    version_b (str): The second version.
    RETURNS (bool): Whether the versions match.
    N)r  )r  r  abr   r   r   is_minor_version_match  s   	r  c                 C   s^  t | } | j sttjj| jd|  r|  s&ttjj| jddt	
| }dD ]}||vs7|| s@ttjj|dq-d|v rttj|d st|d }t|}|dur`d| }nd	|v rkd
|d	  }nd}tjj|d  d|d  |d |tjd}t| t|d rtjj|d  d|d  |d |d ttjd}t| |S )zLoad a model meta.json from a path and validate its contents.

    path (Union[str, Path]): Path to meta.json.
    RETURNS (Dict[str, Any]): The loaded meta.
    ro  	meta.jsonr  )rZ  r  r  )settingspacy_versionNr  spacy_git_versionzgit commit zversion unknownrZ  r  r  r  )modelmodel_versionr  current)r  r  r  example)rp  r  rq  r  r/   r  r%  r  r  rr  rs  rw  E054r  r)   __version__r  r  r0   W095warningswarnr  W094r  )rn  r  r
  lower_versionwarn_msgr   r   r   	load_meta  sF   




r  c                 C   s   t | }t|d S )zGet model meta.json from a directory path and validate its contents.

    path (str / Path): Path to model directory.
    RETURNS (Dict[str, Any]): The model's meta data.
    r	  )rp  r  )rn  r  r   r   r   r    s   r  c                 C   s   zt |  W dS    Y dS )zCheck if string maps to a package installed via pip.

    name (str): Name of package.
    RETURNS (bool): True if installed package, False if not.
    TF)r,   distributionr  r   r   r   r    s
   
r  c                 C   s&   t | }tttttjf |jj	S )zxGet the path to an installed package.

    name (str): Package name.
    RETURNS (Path): Path to installed package.
    )
rc  rd  r   r   r   r6  r|  r}  r  r  )r  pkgr   r   r   get_package_path  s   
r  r  targetreplacementc                 C   sb   |   D ]}||jv r||j|j|< q|   D ]}|jD ]}|||u r-||| qqdS )zReplace a node within a model with a new one, updating refs.

    model (Model): The parent model.
    target (Model): The target node.
    replacement (Model): The node to replace the target with.
    N)walklayersindex	ref_namesmaybe_get_refset_ref)r  r  r  noderef_namer   r   r   replace_model_node  s   

r'  commandc                 C   s   t j| t dS )zSplit a string command using shlex. Handles platform compatibility.
    command (str) : The command to split
    RETURNS (List[str]): The split command.
    )posix)shlexr  r-   )r(  r   r   r   split_command
  s   r+  )r  capturer  r,  c             	   C   s   t | trt| }| }n| }d| }ztj|tj |dd|r#tj	nd|r)tj
ndd}W n tyB   ttjj||d ddw |jdkrs|rsd| d	}|d
|j 7 }|jdurf|d7 }||j7 }t|}||_||_||jdkr~t|j |S )a  Run a command on the command line as a subprocess. If the subprocess
    returns a non-zero exit code, a system exit is performed.
    command (str / List[str]): The command. If provided as a string, the
        string will be split using shlex.split.
    stdin (Optional[Any]): stdin to read from or None.
    capture (bool): Whether to capture the output and errors. If False,
        the stdout and stderr will not be redirected, and if there's an error,
        sys.exit will be called with the return code. You should use capture=False
        when you want to turn over execution to the command, and capture=True
        when you want to run the command more like a function.
    RETURNS (Optional[CompletedProcess]): The process object.
     utf8FN)envinputencodingcheckstdoutstderrr   )str_commandtoolzError running command:

z

zSubprocess exited with status z$

Process log (stdout and stderr):

)r  r6  r+  r#  
subprocessrunr|  environcopyPIPESTDOUTFileNotFoundErrorr/   E970r%  
returncoder3  SubprocessErrorretr(  r~  exit)r(  r  r,  cmd_listcmd_strrA  messager;  r   r   r   run_command  sF   


	



rF  c              
   c   sP    t  }t |  }tt| z|V  W tt| dS tt| w )aU  Change current working directory and returns to previous on exit.
    path (str / Path): The directory to navigate to.
    YIELDS (Path): The absolute path to the current working directory. This
        should be used if the block needs to perform actions within the working
        directory, to prevent mismatches with relative paths.
    N)r   cwdr  r|  chdirr6  )rn  prev_cwdr  r   r   r   working_dirI  s   "rJ  c               
   c   s    t t } | V  dd }ztjdkr tjt| |d W dS tjt| |d W dS  tyI } zt	
tjj| |d W Y d}~dS d}~ww )zExecute a block in a temporary directory and remove the directory and
    its contents at the end of the with block.
    YIELDS (Path): The path of the temp directory.
    c                 S   s   t |tj | | d S rD  )r|  chmodstatS_IWRITE)rmfuncrn  exr   r   r   force_removee  s   z"make_tempdir.<locals>.force_remove)      )onexc)onerror)dirr  N)r   tempfilemkdtempr~  version_infoshutilrmtreer6  PermissionErrorr  r  r0   W091r%  )drP  er   r   r   make_tempdirZ  s   
$r_  c                  C   sd   zt  jjdkrW dS t  jjdkrW dS W n	 ty   Y nw zddl} W dS  ty1   Y dS w )zCheck if user is running spaCy from a Jupyter or Colab notebook by
    detecting the IPython kernel. Mainly used for the displaCy visualizer.
    RETURNS (bool): True if in Jupyter/Colab, False if not.
    ZMQInteractiveShellTzgoogle.colab._shellr   NF)get_ipythonrB  r   r   	NameErrorgoogle.colabre  )googler   r   r   is_in_jupyterr  s    re  c                   C   s   t tdp	t tdS )zCheck if user is running spaCy from an interactive Python
    shell. Will return True in Jupyter notebooks too.
    RETURNS (bool): True if in interactive mode, False if not.
    ps1ps2)r"  r~  r   r   r   r   is_in_interactive  rm  rh  objc                 C   sP   t | dr| jdur| jS t | dr| jS t | dr$t | jdr$| jjS t| S )zGet a human-readable name of a Python object, e.g. a pipeline component.

    obj (Any): The Python object, typically a function or class.
    RETURNS (str): A human-readable name.
    r  Nr   rB  )r"  r  r   rB  r{  )ri  r   r   r   get_object_name  s   
rj  func1func2c                 C   sl   t | rt |s
dS t| drt|dsdS | j|jk}t| t|k}t| t|k}|o5|o5|S )a  Approximately decide whether two functions are the same, even if their
    identity is different (e.g. after they have been live reloaded). Mostly
    used in the @Language.component and @Language.factory decorators to decide
    whether to raise if a factory already exists. Allows decorator to run
    multiple times with the same function.

    func1 (Callable): The first function.
    func2 (Callable): The second function.
    RETURNS (bool): Whether it's the same function (most likely).
    Fr   )callabler"  r   r  getfilegetsourcelines)rk  rl  	same_name	same_file	same_coder   r   r   is_same_func  s   rs  requirenon_blockingc                 C   s*   t  }td u r	d S t|trd S t|dS )N)ru  )r$   r*   r  r"   )rt  ru  opsr   r   r   get_cuda_stream  s   

rw  c                 C   s2   t d u r|S t j|jd|jd}|j|| d |S )NC)orderdtype)stream)r+   ndarrayshaperz  set)r{  numpy_arrayarrayr   r   r   	get_async  s
   r  c                 C   s`   t | } | jdd}| d}W d    n1 sw   Y  ddd |D }t|S )Nr.  )r1  
|c                 S   s"   g | ]}|  rd t| qS ^)stripreescaper  piecer   r   r   r    s   " zread_regex.<locals>.<listcomp>)rp  openr  r  r#  r  compile)rn  file_entries
expressionr   r   r   
read_regex  s   
r  r  c                 C      d dd | D }t|S )a  Compile a sequence of prefix rules into a regex object.

    entries (Iterable[Union[str, Pattern]]): The prefix rules, e.g.
        spacy.lang.punctuation.TOKENIZER_PREFIXES.
    RETURNS (Pattern): The regex object. to be used for Tokenizer.prefix_search.
    r  c                 S   s   g | ]
}|  rd | qS r  r  r  r   r   r   r        z(compile_prefix_regex.<locals>.<listcomp>r#  r  r  r  r  r   r   r   compile_prefix_regex     
r  c                 C   r  )a  Compile a sequence of suffix rules into a regex object.

    entries (Iterable[Union[str, Pattern]]): The suffix rules, e.g.
        spacy.lang.punctuation.TOKENIZER_SUFFIXES.
    RETURNS (Pattern): The regex object. to be used for Tokenizer.suffix_search.
    r  c                 S   s   g | ]
}|  r|d  qS )$r  r  r   r   r   r    r  z(compile_suffix_regex.<locals>.<listcomp>r  r  r   r   r   compile_suffix_regex  r  r  c                 C   r  )a  Compile a sequence of infix rules into a regex object.

    entries (Iterable[Union[str, Pattern]]): The infix rules, e.g.
        spacy.lang.punctuation.TOKENIZER_INFIXES.
    RETURNS (regex object): The regex object. to be used for Tokenizer.infix_finditer.
    r  c                 S   s   g | ]}|  r|qS r   r  r  r   r   r   r        z'compile_infix_regex.<locals>.<listcomp>r  r  r   r   r   compile_infix_regex  r  r  default_funcc                 G   s   t t| |S )aQ  Extend an attribute function with special cases. If a word is in the
    lookups, the value is returned. Otherwise the previous function is used.

    default_func (callable): The default function to execute.
    *lookups (dict): Lookup dictionary mapping string to attribute value.
    RETURNS (callable): Lexical attribute getter.
    )	functoolspartial_get_attr_unless_lookup)r  r   r   r   r   add_lookups  s   
r  r   stringc                 C   s&   |D ]}||v r||   S q| |S rD  r   )r  r   r  lookupr   r   r   r    s
   r  base_exceptionsc                 G   s   t | }|D ];}| D ]/\}}tdd |D s#ttjj||dddd |D }||kr;ttjj||dq|	| qt
|dd}|S )a'  Update and validate tokenizer exceptions. Will overwrite exceptions.

    base_exceptions (Dict[str, List[dict]]): Base exceptions.
    *addition_dicts (Dict[str, List[dict]]): Exceptions to add to the base dict, in order.
    RETURNS (Dict[str, List[dict]]): Combined tokenizer exceptions.
    c                 s   s    | ]
}t |t tV  qd S rD  )r  r2   r6  r  attrr   r   r   r    s    zupdate_exc.<locals>.<genexpr>)rG  orthsr  c                 s   s    | ]}|t  V  qd S rD  r1   r  r   r   r   r    s    'u   ’)dictr^  allrw  r/   E055r%  r#  E056rM  
expand_exc)r  addition_dictsexc	additionsorthtoken_attrsdescribed_orthr   r   r   
update_exc  s   	r  excssearchr(  c                    sX   dd  t | }|  D ]\}}|v r)|} fdd|D }|||< q|S )ab  Find string in tokenizer exceptions, duplicate entry and replace string.
    For example, to add additional versions with typographic apostrophes.

    excs (Dict[str, List[dict]]): Tokenizer exceptions.
    search (str): String to find and replace.
    replace (str): Replacement.
    RETURNS (Dict[str, List[dict]]): Combined tokenizer exceptions.
    c                 S   s    t | }|t |||t< |S rD  )r  r2   r(  )tokenr  r(  fixedr   r   r   
_fix_token/  s   zexpand_exc.<locals>._fix_tokenc                    s   g | ]} |qS r   r   )r  tr  r(  r  r   r   r  8  r  zexpand_exc.<locals>.<listcomp>)r  r^  r(  )r  r  r(  new_excstoken_stringtokensnew_key	new_valuer   r  r   r  #  s   r  lengthstartstopstepc                 C   s~   |d u s|dkst tj|d u rd}n|dk r|| 7 }t| td|}|d u r+| }n|dk r3|| 7 }t| t||}||fS )Nr(   r   )rw  r/   E057minmax)r  r  r  r  r   r   r   normalize_slice=  s   
r  spansr6   c                 C   st   dd }t | |dd}g }t }|D ]}|j|vr/|jd |vr/|| |t|j|j qt |dd d}|S )a  Filter a sequence of spans and remove duplicates or overlaps. Useful for
    creating named entities (where one token can only be part of one entity) or
    when merging spans with `Retokenizer.merge`. When spans overlap, the (first)
    longest span is preferred over shorter spans.

    spans (Iterable[Span]): The spans to filter.
    RETURNS (List[Span]): The filtered spans.
    c                 S   s   | j | j | j fS rD  )endr  spanr   r   r   <lambda>X  s    zfilter_spans.<locals>.<lambda>T)rG  rW  r(   c                 S   s   | j S rD  )r  r  r   r   r   r  a  s    )rG  )r  r~  r  r  r  rM  range)r  get_sort_keysorted_spansr  seen_tokensr  r   r   r   filter_spansO  s   	
r  c                  G   s   t tj|  S rD  )r  	itertoolschain)r  r   r   r   filter_chain_spanse  s   r  c                   C   s   t S rD  )r  r   r   r   r   make_first_longest_spans_filteri     r  gettersc                 C   s   t t| |S rD  )rr  msgpack_dumpsto_dict)r  r  r   r   r   to_bytesm  s   r  
bytes_datasettersc                 C   s   t t| ||S rD  )	from_dictrr  msgpack_loads)r  r  r  r   r   r   
from_bytesq  r  r  c                 C   s6   i }|   D ]\}}|dd |vr| ||< q|S Nr  r   r^  r  )r  r  
serializedrG  getterr   r   r   r  y  s   
r  r  c                 C   s<   |  D ]\}}|dd |vr|| v r|| |  q| S r  r  )r  r  r  rG  setterr   r   r   r    s
   r  writersc                 C   sL   t | } |  s|   | D ]\}}|dd |vr#|| |  q| S r  )rp  rq  mkdirr^  r  )rn  r  r  rG  writerr   r   r   to_disk  s   r  r   c                 C   s<   t | } | D ]\}}|dd |vr|| |  q| S r  )rp  r^  r  )rn  r   r  rG  readerr   r   r   r    s   r  r  c                 C   s.   t j| t|}t j|}|j| |S )zImport module from a file. Used to load models from a directory.

    name (str): Name of module to load.
    loc (str / Path): Path to the file.
    RETURNS: The loaded module.
    )rc  utilspec_from_file_locationr6  module_from_specloaderexec_module)r  r  r  ri  r   r   r   import_file  s   r  htmlc                 C   s   |   ddddS )zPerform a template-specific, rudimentary HTML minification for displaCy.
    Disclaimer: NOT a general-purpose solution, only removes indentation and
    newlines.

    html (str): Markup to minify.
    RETURNS (str): "Minified" HTML.
    z    r  r  )r  r(  )r  r   r   r   minify_html  s   r  c                 C   s4   |  dd} |  dd} |  dd} |  dd} | S )	zReplace <, >, &, " with their HTML encoded representation. Intended to
    prevent HTML errors in rendered displaCy markup.

    text (str): The original text.
    RETURNS (str): Equivalent text to be safely used within HTML.
    &z&amp;r  z&lt;r  z&gt;"z&quot;)r(  )r  r   r   r   escape_html  s
   r  wordsc              	   C   s>  d d |  d | krttjj|| dg }g }d}dd | D }|D ]\}z||d |}W n tyI   ttjj|| ddw |dkrb|||||   |d ||7 }|| |d |t|7 }|t|k r|| dkrd	|d
< |d7 }q*|t|k r|||d  |d ||fS )a  Given a list of words and a text, reconstruct the original tokens and
    return a list of words and spaces that can be used to create a Doc. This
    can help recover destructive tokenization that didn't preserve any
    whitespace information.

    words (Iterable[str]): The words.
    text (str): The original text.
    RETURNS (Tuple[List[str], List[bool]]): The words and spaces.
    r  )r  r  r   c                 S   s   g | ]}|  s|qS r   )isspace)r  wordr   r   r   r    r  z(get_words_and_spaces.<locals>.<listcomp>NFr-  Tr(   )	r#  r  rw  r/   E194r%  r!  r  r  )r  r  
text_wordstext_spacestext_pos
norm_wordsr  
word_startr   r   r   get_words_and_spaces  s6   "



r  c                 C   s2   zt |  W S  ty   ttjj| ddw )zDeep copy a Config. Will raise an error if the config contents are not
    JSON-serializable.

    config (Config): The config to copy.
    RETURNS (Config): The copied config.
    r  N)r   r:  rw  r/   E961r%  r  r   r   r   copy_config  s
   r  valuesc           	      C   sb   i }|   D ](\}}|}| d}t|D ]\}}|t|d k}|||r*|ni }qq|S )a  Convert dot notation to a dict. For example: {"token.pos": True,
    "token._.xyz": True} becomes {"token": {"pos": True, "_": {"xyz": True }}}.

    values (Dict[str, Any]): The key/value pairs to convert.
    RETURNS (Dict[str, dict]): The converted values.
    r  r(   )r^  lowerr  	enumerater  
setdefault)	r  r  rG  r  rn  partsiitemis_lastr   r   r   dot_to_dict  s   r
  r  r  c                C   s   dd t | |dD S )ae  Convert dot notation to a dict. For example: {"token": {"pos": True,
    "_": {"xyz": True }}} becomes {"token.pos": True, "token._.xyz": True}.

    obj (Dict[str, dict]): The dict to convert.
    for_overrides (bool): Whether to enable special handling for registered
        functions in overrides.
    RETURNS (Dict[str, Any]): The key/value pairs.
    c                 S   s   i | ]
\}}d  ||qS )r  )r#  )r  rG  r  r   r   r   r  "  s    
zdict_to_dot.<locals>.<dictcomp>r  )	walk_dict)ri  r  r   r   r   r    s   	
r  r  c              
   C   sN   | }| d}|D ]}z|| }W q	 ttfy$   ttjj|ddw |S )a`  Convert dot notation of a "section" to a specific part of the Config.
    e.g. "training.optimizer" would return the Optimizer object.
    Throws an error if the section is not defined in this config.

    config (Config): The config.
    section (str): The dot notation of the section in the config.
    RETURNS: The object denoted by the section
    r  r  N)r  r  r  r/   E952r%  )r  r  	componentr  r  r   r   r   r  (  s   	
r  r  c              
   C   sp   | }| d}t|D ]*\}}z|t|d kr|||< n|| }W q ttfy5   ttjj|ddw dS )zUpdate a config at a given position from a dot notation.

    config (Config): The config.
    section (str): The dot notation of the section in the config.
    value (Any): The value to set in the config.
    r  r(   r  N)r  r  r  r  r  r/   r  r%  )r  r  r  r  r  r  r  r   r   r   set_dot_to_object;  s   

r  r%  r  c                c   sd    |   D ]*\}}g ||}t|tr*|rtdd |D s*t|||dE dH  q||fV  qdS )zWalk a dict and yield the path and values of the leaves.

    for_overrides (bool): Whether to treat registered functions that start with
        @ as final values rather than dicts to traverse.
    c                 s   s    | ]}| d V  qdS )@N)r  )r  	value_keyr   r   r   r  Z  r  zwalk_dict.<locals>.<genexpr>r  N)r^  r  r  r  r  )r%  r  r  rG  r  
key_parentr   r   r   r  N  s   
r  r.  c                 C   s$   t | }ttg |j|jS )zGet a list of all named arguments of a function (regular,
    keyword-only).

    func (Callable): The function
    RETURNS (List[str]): The argument names.
    )r  getfullargspecr  r  fromkeysr?  
kwonlyargs)r.  argspecr   r   r   get_arg_namesa  s   
r  weightsc                 C   sb   dd | D }| | tdd | D }| D ]\}}|r.|dkr.t|| d||< q|S )a  Combine and normalize score weights defined by components, e.g.
    {"ents_r": 0.2, "ents_p": 0.3, "ents_f": 0.5} and {"some_other_score": 1.0}.

    weights (List[dict]): The weights defined by the components.
    overrides (Dict[str, Optional[Union[float, int]]]): Existing scores that
        should be preserved.
    RETURNS (Dict[str, float]): The combined and normalized weights.
    c                 S   s$   i | ]}|  D ]\}}||qqS r   )r^  )r  w_dictrG  r  r   r   r   r  {  s
    
z)combine_score_weights.<locals>.<dictcomp>c                 S   s   g | ]}|r|nd qS )g        r   )r  r  r   r   r   r    r  z)combine_score_weights.<locals>.<listcomp>r      )rM  sumr  r^  round)r  r  r  
weight_sumrG  r  r   r   r   combine_score_weightsl  s   
r  c                   @   sj   e Zd Zdd Zdd Zdd Zdedd fd	d
Zdee	e
f ddfddZdee	e
f dd fddZdS )DummyTokenizerc                 C   s   t rD  )rF  )r>  r  r   r   r   __call__  r  zDummyTokenizer.__call__c                 k   s    |D ]}| |V  qd S rD  r   )r>  textsr@  r  r   r   r   pipe  s   zDummyTokenizer.pipec                 K   s   dS )N    r   )r>  r@  r   r   r   r    r  zDummyTokenizer.to_bytesdatar  c                 K      | S rD  r   )r>  r#  r@  r   r   r   r    r  zDummyTokenizer.from_bytesrn  Nc                 K      d S rD  r   r>  rn  r@  r   r   r   r    r  zDummyTokenizer.to_diskc                 K   r$  rD  r   r&  r   r   r   r    r  zDummyTokenizer.from_disk)r   r   r   r  r!  r  bytesr  r   r6  r   r  r  r   r   r   r   r    s    r  c                   C   s   t  S rD  )r   r   r   r   r   create_default_optimizer  s   r(  c                 c   s^    t |trt|}n|}t| } 	 t|}tt| t|}t|dkr)dS t|V  q)zlIterate over batches of items. `size` may be an iterator,
    so that batch-size can vary on each step.
    Tr   N)	r  r7  r  repeatiternextr  islicer  )r^  sizesize_
batch_sizebatchr   r   r   	minibatch  s   

r1  c                 C   s`   d}t | |r	dS t | dr.t | dr.| jtjv r.ttj| j | jdd  }t ||S dS )a  Slightly hacky check for whether a callable is implemented in Cython.
    Can be used to implement slightly different behaviors, especially around
    inspecting and parameter annotations. Note that this will only return True
    for actual cdef functions and methods, not regular Python functions defined
    in Python modules.

    func (Callable): The callable to check.
    RETURNS (bool): Whether the callable is Cython (probably).
    __pyx_vtable__Tr   r   r  r   F)r"  r   r~  r  varsr   r  )r.  r  cls_funcr   r   r   is_cython_func  s   

 
r5  env_varc                 C   s"   t j| d}|dkrdS t|S )a  Convert the value of an environment variable to a boolean. Add special
    check for "0" (falsy) and consider everything else truthy, except unset.

    env_var (str): The name of the environment variable to check.
    RETURNS (bool): Its boolean value.
    F0)r|  r9  r'  r8  )r6  r  r   r   r   check_bool_env_var  s   r8  docsr5   procr4   default_error_handlerr@  c           	      c   s    t |dr|j| fi |E d H  d S t|}|}t |dr#| }dD ]}||v r0|| q%| D ])}z||fi |}|V  W q3 ty\ } z||||g| W Y d }~q3d }~ww d S )Nr!  get_error_handler)r/  )r"  r!  r  r<  rK  r  )	r9  r:  r  r;  r@  error_handlerargdocr^  r   r   r   _pipe  s(   
	


r@  c                 C   s   |rD  r   	proc_namer:  r9  r^  r   r   r   raise_error  r  rC  c                 C   r%  rD  r   rA  r   r   r   ignore_error  r  rD  c                  C   sJ   t  rddlm}  | jdur!ddlm} | s#ttj	 dS dS dS dS )zqWarn about require_gpu if a jupyter notebook + cupy + mismatched
    contextvars vs. thread ops are detected
    r   )CupyOpsN)contextvars_eq_thread_ops)
re  thinc.backends.cupy_opsrE  xpthinc.backendsrF  r  r  r0   W111)rE  rF  r   r   r   warn_if_jupyter_cupy  s   
rK  c                 C   sP   | j di }t|dkr$| jtv r&dt}ttj	j
||d d S d S d S )Nlexeme_normr   r  )r  langs)r   	get_tabler  rZ  LEXEME_NORM_LANGSr#  loggerdebugr0   W033r%  )r  component_namelexeme_normsrM  r   r   r   check_lexeme_norms	  s
   
rU  c                 C   s@   | du rdS | du rdS | du rdS | dkrdS | dkrdS dS )zConvert a value to the ternary 1/0/-1 int used for True/None/False in
    attributes such as SENT_START: True/1/1.0 is 1 (True), None/0/0.0 is 0
    (None), any other values are -1 (False).
    Tr(   Nr   Fr  r   )valr   r   r   to_ternary_int  s   rW  c                  C   sJ   t t} t D ]}|dpd D ]}| | |jd  qqt| S )zReturn a mapping of top-level packages to their distributions. We're
    inlining this helper from the importlib_metadata "backport" here, since
    it's not available in the builtin importlib.metadata.
    ztop_level.txtr  Name)	r   r  r,   distributions	read_textr  r  metadatar  )pkg_to_distdistr  r   r   r   packages_distributions'  s   r^  c                 C   s    t | }t|dot|d S )ztReturn True if all the elements are equal to each other
    (or if the input is an empty sequence), False otherwise.TF)r  groupbyr+  )iterablegr   r   r   	all_equal3  s   
rb  	localhostporthostc                 C   s\   t  t jt j}z z||| f W W |  dS  t jy(   Y W |  dS w |  w )zCheck if 'host:port' is in use. Return True if it is, False otherwise.

    port (int): the port to check
    host (str): the host to check (default "localhost")
    RETURNS (bool): Whether 'host:port' is in use.
    FT)socketAF_INETSOCK_STREAMbindcloser;  )rd  re  sr   r   r   _is_port_in_use:  s   
rl  auto_selectc                 C   s   t | |s| S | }|sttjj|dt ||r*|dk r*|d7 }t ||r*|dk s|dkr<t ||r<ttjj|dttj	j|| |d |S )a  Given a starting port and a host, handle finding a port.

    If `auto_select` is False, a busy port will raise an error.

    If `auto_select` is True, the next free higher port will be used.

    start (int): the port to start looking from
    host (str): the host to find a port on
    auto_select (bool): whether to automatically select a new port if the given port is busy (default False)
    RETURNS (int): The port to use.
    )rd  i  r(   )re  )re  rd  
serve_port)
rl  rw  r/   E1050r%  E1049r  r  r0   W124)r  re  rm  rd  r   r   r   find_available_portK  s   
rr  )T)FTrD  )rc  )F)r  rc  importlib.utilr  r  loggingr|  r  r*  rY  rf  rL  r7  r~  rV  r  collectionsr   
contextlibr   pathlibr   typesr   typingr   r   r   r	   r
   r   r   r   r   r   r   r   r   r   r   r   r   r)  numpyrr  thincr   r   packaging.requirementsr   packaging.specifiersr   r   packaging.versionr   r   	thinc.apir   r   r    r!   r"   r#   r$   cupy.randomr+   re  r%   r&   r'   r  r)   compatr*   r,   r-   r  r.   r/   r0   symbolsr2   languager3   r4   r  r5   r6   r  r7   iinfouint64r  OOV_RANKDEFAULT_OOV_PROBrO  r  r]  	getLoggerrP  StreamHandlerlogger_stream_handlersetFormatter	Formatter
addHandlerr   r   r  r9  r  rQ  r6  r8  r[  ra  rk  rg  rp  ry  r  _DEFAULT_EMPTY_PIPESr  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r'  r+  CompletedProcessrF  rJ  r_  re  rh  rj  rs  rw  r  r  r  r  r  r  r  r  r  r7  r  r  r  r  r'  r  r  r  r  r  r  r  r  r  r  r  r
  r  r  r  r  r  floatr  r  r(  r1  r5  r8  r  r@  rC  rD  rK  rU  rW  r^  rb  rl  rr  r   r   r   r   <module>   sd   L$
	
 !"#$%&'()*+,-./0123456789:;<=>?@ABCDE
N
{)	"


-

 
	
1

	

2



(


,




 		&&+
$
7$		$
$$,
	
$$0




$&)(6"


",