o
    i<                     @   sn  d dl Z d dlZd dlmZ d dlZzd dlmZ W n ey)   d dlmZ Y nw d dl	m
Z
mZmZmZmZmZmZmZ d dlmZmZ d dlmZmZmZmZ d dlmZ d dlmZ d d	lm Z  d d
l!m"Z" d dl#m$Z$m%Z% d dl&m'Z'm(Z(m)Z) d dl*m+Z+ d dl,m-Z-m.Z.m/Z/m0Z0m1Z1 ddlm2Z2m3Z3 ej4dd Z5ej67ddd Z8ej67ddd Z9ej6:dddgdd Z;ej6:ddd gd!d" Z<ej6:d#d$gd%d& Z=dd+d,Z>d-d. Z?d/d0 Z@d1d2 ZAd3d4 ZBd5d6 ZCej6:d7eed8fed9e d8fd:d;d<d=d>d?d@g	dAdB ZDej6:dCg dDdEdF ZEej6:dGg dHdIdJ ZFej6:dKd8d8dLdMd8dNd8idOifdPdQdRdSdPdTdQidUifdVdWidXdYdZdWiiifgd[d\ ZGej6:dKd8d8dLdMd8dNd8idOifdPdQdRdSdPdTdQidUifd]dZdWiidXdYdZdWiiifgd^d_ ZHd`da ZIej6:dbg dcd)gfg ddd'gfg ded)d*gfg dfd(gfg dgd(gfg dhd)d)gfg did)d)gfg djg dkfg dld)gfg dmd'gfg dnd*d*gfg dod*d*gfgdpdq ZJej6:dbg drdd*gfg dsdd'gfg dgdd(gfg dtg dufg dvdd*gfg dwg dxfgdydz ZKd{d| ZLd}d~ ZMdd ZNdd ZOdd ZPdd ZQdS )    N)Path)ValidationError)ConfigConfigValidationErrorCupyOpsMPSOpsNumpyOps	Optimizerget_current_opsset_current_ops)has_cupy_gpuhas_torch_mps_gpu)
prefer_gpurequire_cpurequire_gpuutil)__version__)English)Dutch)DEFAULT_CONFIG_PATH)PrecomputableAffine&_backprop_precomputable_affine_padding)ConfigSchemaTrainingTokenPatternTokenPatternSchema)minibatch_by_words)SimpleFrozenListdot_to_objectfind_available_portimport_fileto_ternary_int   )get_random_docmake_tempdirc                  C   s8   z	t  dk} W | S  ty   tjj dk} Y | S w )z/Determine if the tests are run as admin or not.r   )osgetuidAttributeErrorctypeswindllshell32IsUserAnAdmin)admin r,   I/home/ubuntu/.local/lib/python3.10/site-packages/spacy/tests/test_misc.pyis_admin.   s   r.   i?  c                 C   sd   | d}|d d }|dd }|dd }t |||f}||v s$J ||vs*J ||v s0J d S )Nz zero one two three four five six               )r   filter_spans)en_tokenizerdocs1s2s3resultr,   r,   r-   test_issue62079   s   r;   ir  c                   C   sH   t t gd tt t g d W d   dS 1 sw   Y  dS )z=Test that the non-empty constraint pattern field is respected)patternN)r   r   pytestraisesr   r,   r,   r,   r-   test_issue6258H   s   "r?   textzhello/worldzhello worldc                 C   s   t | }t|tsJ d S N)r   ensure_path
isinstancer   )r@   pathr,   r,   r-   test_util_ensure_path_succeedsS   s   
rE   zpackage,result)numpyT)sfkodskfosdkfpsdpofkspdofFc                 C   s   t | |u s	J dS )zHTest that an installed package via pip is recognised by util.is_package.N)r   
is_package)packager:   r,   r,   r-   test_util_is_packageY   s   rJ   rI   thincc                 C   s   t | }t|tsJ dS )z7Test that a Path object is returned for a package name.N)r   get_package_pathrC   r   )rI   rD   r,   r,   r-   test_util_get_package_patha   s   
rM   r/   r2   r0      c                 C   sV  t | |||d }|dj|| ||fksJ |jd|f}||\}}|j|jd d || |fks7J |jd| |f}|jd|f}	d|	d< d|d< |d	rWJ t|||	}
|
d
 dkseJ |		d |	d d|d< d|	d< d|	d< d|	d< d|d< d|	d< d|d< t|||	}
|
d dksJ |
d dksJ |
d
 dksJ d S )N)nOnInFnPW
   r   r!      )r!   rN   pad)r   rN   r   r         ?        )r!   r!   )r!   r   )rN   r   r2   rN   )r   r   r   r   r1   )r   r!   r   r   )
r   
initialize	get_paramshapeopsallocbegin_updatehas_gradr   fill)rO   rP   rQ   rR   modeltensorYget_dXdYidsd_padr,   r,   r-   test_PrecomputableAffineh   s2    

ri   c                  C   s\   t  } trt s
J tt  tsJ ntr#t sJ tt  ts"J nt r(J t|  d S rA   )r
   r   r   rC   r   r   r   r   current_opsr,   r,   r-   test_prefer_gpu   s   


rl   c                  C   sH   t  } trt  tt  tsJ ntrt  tt  tsJ t|  d S rA   )r
   r   r   rC   r   r   r   r   rj   r,   r,   r-   test_require_gpu   s   rm   c                  C   st   t  } t  tt  tsJ zdd l}t  tt  tsJ W n	 ty(   Y nw t  tt  ts4J t|  d S )Nr   )	r
   r   rC   r   cupyr   r   ImportErrorr   )rk   rn   r,   r,   r-   test_require_cpu   s   rp   c                  C   s>   t tjj} | dD ]}tdd |jD sJ |jqdS )z{Test that all filenames in the project are ASCII.
    See: https://twitter.com/_inesmontani/status/1177941471632211968
    z**/*c                 s   s    | ]	}t |d k V  qdS )   N)ord).0cr,   r,   r-   	<genexpr>   s    z'test_ascii_filenames.<locals>.<genexpr>N)r   __file__parentgloballname)rootrD   r,   r,   r-   test_ascii_filenames   s    r|   c                  C   s   t d} | jdksJ | jg ksJ tt t d W d   n1 s(w   Y  tt t d W d   dS 1 sCw   Y  dS )z`Test that using a model name like "blank:en" works as a shortcut for
    spacy.blank("en").
    zblank:enenz	blank:zxxNzblank:fjsfijsdof)r   
load_modellangpipeliner=   r>   ro   )nlpr,   r,   r-   test_load_model_blank_shortcut   s   
"r   zversion,constraint,compatibleTz>=)3.0.0z2.0.0F)3.2.1z>=2.0.0T)z2.2.10a1z>=1.0.0,<2.1.1F)z
3.0.0.dev3>=1.2.3,<4.5.6T)n/ar   N)z1.2.3r   N)r   r   Nc                 C   s   t | ||u s
J d S rA   )r   is_compatible_version)version
constraint
compatibler,   r,   r-   test_is_compatible_version   s   r   zconstraint,expected)	)r   F)z==3.0.0F)z>=2.3.0T)z>2.0.0T)z<=2.0.0T)z>2.0.0,<3.0.0F)z>=2.0.0,<3.0.0F)z!=1.1,>=1.0,~=1.0T)r   Nc                 C   s   t | |u s	J d S rA   )r   is_unconstrained_version)r   expectedr,   r,   r-   test_is_unconstrained_version   s   r   za1,a2,b1,b2,is_match))r   3.0z3.0.1r   T)z3.1.0z3.1r   z3.2F)xxxNz
1.2.3.dev0z1.2Fc                 C   sP   t | |ks	J t ||ksJ t | ||u sJ t |||u s&J d S rA   )r   get_minor_versionis_minor_version_match)a1a2b1b2is_matchr,   r,   r-   test_minor_version   s   	r   zdot_notation,expected)z	token.posztoken._.xyztokenxyz)pos_rq   g{Gz?)ztraining.batch_sizeztraining.optimizer.learn_ratetraining
learn_rate)
batch_size	optimizerzattribute_ruler.scorer.@scorerszspacy.tagger_scorer.v1attribute_rulerscorerz@scorersc                 C   s,   t | }||ksJ t || ksJ d S rA   r   dot_to_dictdict_to_dotdot_notationr   r:   r,   r,   r-   test_dot_to_dict   s   
r   zattribute_ruler.scorerc                 C   s0   t | }||ksJ t j|dd| ksJ d S )NT)for_overridesr   r   r,   r,   r-   test_dot_to_dict_overrides  s   
r   c                  C   sr  dddiddddiid} t t t| d	d
 W d    n1 s%w   Y  t t t| dd
 W d    n1 sAw   Y  t t t| dd
 W d    n1 s]w   Y  t| dd
 | d d d
kssJ t| dddi | d d d d dksJ | d d d dksJ t| dd | d dksJ t| dd t| dddksJ d S )Nr!   xybarbazabrt   )footestzfoo.bar.bazd   zhello.worldz
test.a.b.czfoo.barr   r   z	foo.baz.xhelloworldr   r   {   )r=   r>   KeyErrorr   set_dot_to_objectdict)configr,   r,   r-   test_set_dot_to_object'  s&   r   zdoc_sizes, expected_batches)  r      )r   r   r   r0   )r   r   r   r0      )r   r   r   r0   r!   )r   r   r   r0   r!   i  )r   r   r   r0   r!   r   )r   r   r   r0   r!     )r   r   r   r0   r!   r   r   )r0   rN   r!   r!   )r!   rN   r   )r!   rN   r   r!   )r!   r   r   r!   )r!   r   r   r!   c                 C   st   dd | D }d}d}t t|||dd}dd |D |ks J |||  }|D ]}tdd |D |k s7J q(d S )	Nc                 S      g | ]}t |qS r,   r"   rs   doc_sizer,   r,   r-   
<listcomp>L      z'test_util_minibatch.<locals>.<listcomp>皙?  Tsize	tolerancediscard_oversizec                 S   r   r,   lenrs   batchr,   r,   r-   r   R  r   c                 S   r   r,   r   )rs   r6   r,   r,   r-   r   V  r   )listr   sum)	doc_sizesexpected_batchesdocstolr   batchesmax_sizer   r,   r,   r-   test_util_minibatch:  s   r   )r   i  r   )r   r   r     r   )r   r   r   r     r   r   )r!   r!   r0   rN   )r!   rN   i'  )r   r!   r   r!   r!   r!   r   )r!   r!   r!   r/   c                 C   sD   dd | D }d}d}t t|||dd}dd |D |ks J dS )	z=Test that oversized documents are returned in their own batchc                 S   r   r,   r   r   r,   r,   r-   r   f  r   z0test_util_minibatch_oversize.<locals>.<listcomp>r   r   Fr   c                 S   r   r,   r   r   r,   r,   r-   r   l  r   N)r   r   )r   r   r   r   r   r   r,   r,   r-   test_util_minibatch_oversizeY  s   r   c                  C   sd  d} t  | }tj|dd}t  t}d|d d< tj|dd}t|ts)J t|ts0J |j	g ks7J |j	dgks?J |
djjd d	u sLJ |jd d
 dgksXJ |jd d
 g kscJ tt t|jd W d    n1 syw   Y  tt t|jd W d    n1 sw   Y  tjj|jd td}ttd|idtsJ d S )Na)  
    [nlp]
    lang = "en"
    pipeline = ["textcat"]

    [components]

    [components.textcat]
    factory = "textcat"

    [components.textcat.model]
    @architectures = "spacy.TextCatBOW.v3"
    exclusive_classes = true
    length = 262144
    ngram_size = 1
    no_output_layer = false
    T)	auto_fillnlr   r   textcatmulti_labelFr   znlp.pipeline.taggerznlp.unknownattributer   )schematraining.optimizer)r   from_strr   load_model_from_config	from_diskr   rC   r   r   
pipe_namesget_piperb   attrsr   r=   r>   r   r   registryresolver   r	   )
cfg_string
nlp_configen_nlpdefault_confignl_nlpTr,   r,   r-   test_util_dot_sectiono  s*   r   c                  C   sF  t ddg} | ddgksJ | ddksJ tt | d W d    n1 s,w   Y  tt |   W d    n1 sEw   Y  tt | dg W d    n1 s`w   Y  tt |   W d    n1 syw   Y  t ddgdd} tt | d W d    d S 1 sw   Y  d S )Nr   r   r!   r   zError!)error)	r   indexr=   r>   NotImplementedErrorappendsortextendpop)tr,   r,   r-   test_simple_frozen_list  s&   

"r   c                  C   s   dddiidddd} t | dg}t|d tsJ tt}t | ddg W d    n1 s4w   Y  |jj}t	|d	ksEJ |d d
 ddgksQJ d S )Nr   z@optimizerszAdam.v1r   ztraining.xyzr   )r   r   r   r!   locr   r   )
r   resolve_dot_namesrC   r	   r=   r>   r   valueerrorsr   )r   r:   er   r,   r,   r-   test_resolve_dot_names  s   
r   c               	   C   s   d} t  G}tj|d}t|d}||  W d    n1 s"w   Y  td| ddddd	iiii}t|}|	d |
  W d    d S 1 sOw   Y  d S )
NaG  
from spacy import Language

class DummyComponent:
    def __init__(self, vocab, name):
        pass

    def initialize(self, get_examples, *, nlp, dummy_param: int):
        pass

@Language.factory(
    "dummy_component",
)
def make_dummy_component(
    nlp: Language, name: str
):
    return DummyComponent(nlp.vocab, name)
zcode.pywpython_coderZ   
componentsdummy_componentdummy_paramr!   )r#   r$   rD   joinopenwriter   r   from_configadd_piperZ   )code_strtemp_dir	code_pathfilehr   r   r,   r,   r-   test_import_code  s   



"r  c                   C   s   t ddksJ t d dksJ t ddksJ t ddks J t ddks(J t ddks0J t ddks8J t ddks@J t ddksHJ t d	dksPJ t d
dksXJ t dd
gdksbJ d S )NTr!   r   FrV   rX   rY   r2   istring)r    r,   r,   r,   r-   test_to_ternary_int  s   r  c               	   C   s   d} d}t || |ksJ dddlm}m} || ||1}tjtdd t || dd	}W d    n1 s7w   Y  ||d
 ksFJ dW d    d S 1 sQw   Y  d S )Nz0.0.0.0i  zPort 5001 isn't freer   )demo_appmake_serverzalready in use)matchT)auto_selectr!   zDidn't find next port)r   wsgiref.simple_serverr  r  r=   warnsUserWarning)hostportr  r  httpd
found_portr,   r,   r-   test_find_available_port  s   "r  )r/   r2   r0   rN   )Rr'   r$   pathlibr   r=   pydantic.v1r   ro   pydantic	thinc.apir   r   r   r   r   r	   r
   r   thinc.compatr   r   spacyr   r   r   r   spacy.aboutr   spacy_versionspacy.lang.enr   spacy.lang.nlr   spacy.languager   spacy.ml._precomputable_affiner   r   spacy.schemasr   r   r   spacy.training.batchersr   
spacy.utilr   r   r   r   r    r"   r#   fixturer.   markissuer;   r?   parametrizerE   rJ   rM   ri   rl   rm   rp   r|   r   r   r   r   r   r   r   r   r   r   r   r   r  r  r  r,   r,   r,   r-   <module>   s    (











	







* 