o
    i                     @   s6  d dl Z d dlZd dlmZ d dlmZ d dlmZmZm	Z	m
Z
 d dlZd dlZd dlmZ d dlmZ d dlmZ d dlZd dlmZ d d	lmZmZ d d
lmZmZmZ d dlmZ d dlmZm Z m!Z!m"Z"m#Z#m$Z$m%Z%m&Z& d dl'm(Z(m)Z) d dl*m+Z+ d dl,m-Z- d dl.m/Z/m0Z0m1Z1 d dl2m3Z3 d dl4m5Z5m6Z6 d dl7m8Z8 d dl9m:Z: d dl;m<Z< d dl=m>Z> d dl?m@Z@ d dlAmBZBmCZC d dlDmEZE d dlFmGZGmHZHmIZI d dlJmKZKmLZLmMZM d dlNmOZOmPZPmQZQmRZR ddlSmTZT ejUVdd d! ZWejUVd"d#d$ ZXejUVd%d&d' ZYejUVd(ejUZd)g d*d+e[d,e[fd-d.Z\d/d0 Z]d1d2 Z^ejUZd3d4d5gd6d7 Z_d8d9 Z`d:d; Zad<d= ZbejUZd>d?d@gdAdBifdCgdAdBifd?dDgdAdDifdEgdAdDifg dFdGdHdIfd?dJgdGdHdIfg dKdLdGdMdNfg dOdLdGdMdNfgdPdQ ZcejUZdRdSgg dTgdUdV ZdejUZdRg dWdAggdXdY ZedZd[ ZfejUZd\d]d^gejUZd_g d`g g dag dbdcddggejUZdedfdggejUZdhdGdMgdidj Zgdkdl ZhejUZdmg dndodp ZiejUZdmg dqdrds Zjdtdu Zkdvdw ZlejUZdxg dydzd{ Zmd|d} ZnejUjoejUZd~g ddd Zpdd Zqdd ZrejUZdxddcgdd Zsdd Ztdd Zudd Zvdd ZwejUZdg ddd Zxdd Zydd Zzdd Z{dd Z|dd Z}dd Z~dd Zdd Zdd Zdd Zdd Zdd Zdd Zdd Zdd ZdS )    N)Counter)Path)AnyDictListTuple)NoSuchOption)SpecifierSet)Config)about)download_moduleinfo)parse_config_overridesstring_to_listwalk_directory)apply)_compile_gold_get_distribution_get_kl_divergence_get_labels_from_model_get_labels_from_spancat_get_span_characteristics_get_spans_length_freq_dist_print_span_characteristics)get_compatibilityget_version)render_parses)find_threshold)RECOMMENDATIONSfill_configinit_config)_init_labels)_is_permitted_package_nameget_third_party_dependencies)get_model_pkgs)English)Dutch)Language)RecommendationSchema)DocDocBin)Span)Exampledocs_to_jsonoffsets_to_biluo_tags)conll_ner_to_docsconllu_to_docsiob_to_docs)ENV_VARSget_minor_versionload_configload_model_from_config   )make_tempdiri9  c                  C   s@   d} t t| }tdd |d D rJ |d drJ dS )zd
    conllu_to_docs should not raise an exception if the HEAD column contains an
    underscore
    a%  
1	[	_	PUNCT	-LRB-	_	_	punct	_	_
2	This	_	DET	DT	_	_	det	_	_
3	killing	_	NOUN	NN	_	_	nsubj	_	_
4	of	_	ADP	IN	_	_	case	_	_
5	a	_	DET	DT	_	_	det	_	_
6	respected	_	ADJ	JJ	_	_	amod	_	_
7	cleric	_	NOUN	NN	_	_	nmod	_	_
8	will	_	AUX	MD	_	_	aux	_	_
9	be	_	AUX	VB	_	_	aux	_	_
10	causing	_	VERB	VBG	_	_	root	_	_
11	us	_	PRON	PRP	_	_	iobj	_	_
12	trouble	_	NOUN	NN	_	_	dobj	_	_
13	for	_	ADP	IN	_	_	case	_	_
14	years	_	NOUN	NNS	_	_	nmod	_	_
15	to	_	PART	TO	_	_	mark	_	_
16	come	_	VERB	VB	_	_	acl	_	_
17	.	_	PUNCT	.	_	_	punct	_	_
18	]	_	PUNCT	-RRB-	_	_	punct	_	_
c                 S   s   g | ]}|j jqS  )headi.0tr8   r8   H/home/ubuntu/.local/lib/python3.10/site-packages/spacy/tests/test_cli.py
<listcomp>L       z>test_cli_converters_conllu_empty_heads_ner.<locals>.<listcomp>r   ENT_IOBN)listr0   allhas_annotation)
input_datadocsr8   r8   r>   *test_cli_converters_conllu_empty_heads_ner0   s   rG   i<  c                  C   s(   t  } t| di }| |g d S )N )r'   r,   	from_dictmake_docevaluate)nlpexampler8   r8   r>   test_issue4924Q   s   rN   i  c                  C   s4  dddgdddiddidd} t | }t E}|d }|| dg d	dd
t|id
t|iddid	d}t|}|d }|| |d }t||dd t|}W d   n1 saw   Y  |d d d
 t|kstJ |d d d
 t|ksJ |d d d dksJ d|d d v sJ dS )zETest that fill-config doesn't turn sourced components into factories.entok2vectagger)langpipelinefactory)rP   rQ   )rL   
components
test_model)rP   rQ   nersourcerW   zbase.cfgz
config.cfgT)silentNrU   model)r%   from_configr7   to_diskstrr
   r   r4   )
source_cfg
source_nlpdir_pathsource_pathbase_cfg	base_pathoutput_path
filled_cfgr8   r8   r>   test_issue7055X   s4   





rf   i1  zfactory,output_file))depszparses.html)entszentities.html)spansz
spans.htmlrT   output_filec                 C   s  t  }dddddddddd	d
dgdddddddddddddd	d
dgiddddddddddd	dddddddddd	ddd d!d"d#d$d%dd	d&d'd(d)d)d*d+d,d-d	d-d.d/d0d"d1d2d3dd	d4d5d6d)d)d7d8d9d-d	d:ddd)d)d;d<d,d=d	d=d>d?d0d"d@dAd%d4d	ddBdCdDdEdFdGdHdd	dddId0dJd@dKdLd-d	dMdNdd0dJd@dOdPdd	dQdRdSdDdEdTdUdHdVd	dVd
dWd0dJdXdYdZd-d	d[d\dd0d"dXd]dPdVd	d^d_d`dDdEdadbdHdd	gdc}tdd}t|j|}tdh|g|ddde| dfi ||  sJ W dg   dgS 1 sw   Y  dgS )izL
    Test if all displaCy types (ents, dep, spans) produce an HTML file
    6   nam_adj_country,   )endlabelstartS   nam_liv_personE   d   nam_pro_title_bookV   scrH   )rn   kb_idro   rp   uk   Niedawno czytał em nową książkę znakomitego szkockiego medioznawcy , Briana McNaira - Cultural Chaos .r      ADVz
Degree=Posniedawnoadvmodr6   )	idrp   rn   tagposmorphlemmadepr9   	      PRAETVERBzYAnimacy=Hum|Aspect=Imp|Gender=Masc|Mood=Ind|Number=Sing|Tense=Past|VerbForm=Fin|Voice=Actu   czytaćROOT         AGLTNOUNz-Animacy=Inan|Case=Ins|Gender=Masc|Number=Singemiobj         ADJz*Case=Acc|Degree=Pos|Gender=Fem|Number=Singnowyamod         SUBSTzCase=Acc|Gender=Fem|Number=Singu	   książkaobj       +   z8Animacy=Nhum|Case=Gen|Degree=Pos|Gender=Masc|Number=Singznakomitacl   z7Animacy=Hum|Case=Gen|Degree=Pos|Gender=Masc|Number=Singszkockiy   7   B   z,Animacy=Hum|Case=Gen|Gender=Masc|Number=SingmedioznawcaC   D   INTERPPUNCTzPunctType=Comm,punctK   PROPNBriannmod
   L   McNairflat   T   U   zPunctType=Dash-   ^   z-Animacy=Inan|Case=Nom|Gender=Masc|Number=SingCulturalconj   _   Chaos   e   f   zPunctType=Peri.)rh   ri   texttokenspl)rF   rd   
model_namelimitTNr8   )r7   spacyblankr)   vocab	from_jsonr   is_file)rT   rj   tmp_dirdoc_jsonrL   docr8   r8   r>   test_issue12566{   sT   	



*
$r   c                  C   sv   t  } | d t &}| | t|dgd}|d dks J |d dgks)J W d    d S 1 s4w   Y  d S )NtextcatrH   )excluderR   nlrU   )r&   add_piper7   r\   r   )rL   r   raw_datar8   r8   r>   test_cli_info   s   

"r   c                  C   sh  g d} d | }tt|dd}t|dksJ t|g}|d d dks(J t|d d dks4J t|d d d d dksDJ |d d d d d }t|d	 d
ksZJ |d	 }dd |D g dkskJ dd |D g dksxJ dd |D g dksJ dd |D g dksJ dd |d d d d D }t|d |dd}|g dksJ d S )N)zG1	Dommer	dommer	NOUN	_	Definite=Ind|Gender=Masc|Number=Sing	2	appos	_	Oz/2	Finn	Finn	PROPN	_	Gender=Masc	4	nsubj	_	B-PERz.3	Eilertsen	Eilertsen	PROPN	_	_	2	name	_	I-PERuC   4	avstår	avstå	VERB	_	Mood=Ind|Tense=Pres|VerbForm=Fin	0	root	_	O
r6   n_sentsr   r}   
paragraphs	sentencesr   r   c                 S      g | ]}|d  qS orthr8   r;   r8   r8   r>   r?      r@   z6test_cli_converters_conllu_to_docs.<locals>.<listcomp>)DommerFinn	Eilertsen   avstårc                 S   r   r~   r8   r;   r8   r8   r>   r?      r@   )r   r   r   r   c                 S   r   r9   r8   r;   r8   r8   r>   r?      r@   )r6   r   r   c                 S   r   r   r8   r;   r8   r8   r>   r?      r@   )apposnsubjnamer   c                 S   "   g | ]}|d  |d |d fqS r   r6   r   r8   r<   er8   r8   r>   r?          entitiesOmissing)r   zB-PERzL-PERr   joinrB   r0   lenr-   r.   linesrE   converted_docs	convertedsentr   ent_offsets
biluo_tagsr8   r8   r>   "test_cli_converters_conllu_to_docs   s(   

 r   r   )L1	Dommer	dommer	NOUN	_	Definite=Ind|Gender=Masc|Number=Sing	2	appos	_	name=OzB2	Finn	Finn	PROPN	_	Gender=Masc	4	nsubj	_	SpaceAfter=No|name=B-PERz33	Eilertsen	Eilertsen	PROPN	_	_	2	name	_	name=I-PERV   4	avstår	avstå	VERB	_	Mood=Ind|Tense=Pres|VerbForm=Fin	0	root	_	SpaceAfter=No|name=Oz%5	.	$.	PUNCT	_	_	4	punct	_	name=B-BAD)zG1	Dommer	dommer	NOUN	_	Definite=Ind|Gender=Masc|Number=Sing	2	appos	_	_z@2	Finn	Finn	PROPN	_	Gender=Masc	4	nsubj	_	SpaceAfter=No|NE=B-PERz13	Eilertsen	Eilertsen	PROPN	_	_	2	name	_	NE=L-PERuO   4	avstår	avstå	VERB	_	Mood=Ind|Tense=Pres|VerbForm=Fin	0	root	_	SpaceAfter=Noz#5	.	$.	PUNCT	_	_	4	punct	_	NE=B-BADc                 C   s  d | }tt|ddddd}t|dksJ t|g}|d d dks(J t|d d	 dks4J |d d	 d d
 dksBJ t|d d	 d d dksRJ |d d	 d d d }t|d dkshJ |d }dd |D g dksyJ dd |D g dksJ dd |D g dksJ dd |D g dksJ dd |d d	 d d D }t|d |dd}|g dksJ d S )Nr   r6   PERSONrH   )PERBAD)r   ner_mapr   r}   r   rawu   Dommer FinnEilertsen avstår. r   r   r   c                 S   r   r   r8   r;   r8   r8   r>   r?     r@   zCtest_cli_converters_conllu_to_docs_name_ner_map.<locals>.<listcomp>)r   r   r   r   r   c                 S   r   r   r8   r;   r8   r8   r>   r?     r@   )r   r   r   r   r   c                 S   r   r   r8   r;   r8   r8   r>   r?     r@   )r6   r   r   r   r   c                 S   r   r   r8   r;   r8   r8   r>   r?     r@   )r   r   r   r   r   c                 S   r   r   r8   r   r8   r8   r>   r?     r   r   r   r   )r   zB-PERSONzL-PERSONr   r   r   r   r8   r8   r>   /test_cli_converters_conllu_to_docs_name_ner_map   s,   

 r  c                  C   s  g d} d | }tt|dddd}t|dksJ t|g}|d d dks*J t|d d dks6J |d d d d	 d
ksDJ t|d d d d dksTJ |d d d d d }t|d dksjJ |d }dd |D g dks{J dd |D g dksJ dd |D g dksJ dd |D g dksJ dd |D g dksJ dd |D g dksJ dd |D g dksJ dd |d d d d D }t|d |dd }|g d!ksJ d S )"N)r   z2-3	FE	_	_	_	_	_	_	_	_z42	Finn	Finn	PROPN	_	Gender=Masc	4	nsubj	_	name=B-PERzC3	Eilertsen	Eilertsen	X	_	Gender=Fem|Tense=past	2	name	_	name=I-PERr   z!5	.	$.	PUNCT	_	_	4	punct	_	name=Or   r6   T)r   merge_subtokensappend_morphologyr   r}   r   r  u   Dommer FE avstår. r   r   r   c                 S   r   r   r8   r;   r8   r8   r>   r?   (  r@   z@test_cli_converters_conllu_to_docs_subtokens.<locals>.<listcomp>)r   FEr   r   c                 S   r   r   r8   r;   r8   r8   r>   r?   )  r@   )z*NOUN__Definite=Ind|Gender=Masc|Number=Singz#PROPN_X__Gender=Fem,Masc|Tense=pastz&VERB__Mood=Ind|Tense=Pres|VerbForm=Finr   c                 S   r   )r   r8   r;   r8   r8   r>   r?   /  r@   )r   r   r   r   c                 S   r   )r   r8   r;   r8   r8   r>   r?   0  r@   )z$Definite=Ind|Gender=Masc|Number=SingzGender=Fem,Masc|Tense=pastz Mood=Ind|Tense=Pres|VerbForm=FinrH   c                 S   r   )r   r8   r;   r8   r8   r>   r?   6  r@   )dommerzFinn Eilertsenu   avståz$.c                 S   r   r   r8   r;   r8   r8   r>   r?   7  r@   )r6   r6   r   r   c                 S   r   r   r8   r;   r8   r8   r>   r?   8  r@   )r   r   r   r   c                 S   r   r   r8   r   r8   r8   r>   r?   9  r   r   r   r   )r   zU-PERr   r   r   r   r8   r8   r>   ,test_cli_converters_conllu_to_docs_subtokens  s8   

 r	  c            	      C   s  g d} d | }tt|dd}t|dksJ t|}|d dks%J t|d dks/J t|d d d	 d
ks=J tdd
D ])}|d d d	 | }t|d dksXJ |d }g d}dd |D |kskJ qBt|d jdkswJ |d jD ]	}|jdv sJ q|d S )N)zAI|O like|O London|I-GPE and|O New|B-GPE York|I-GPE City|I-GPE .|OzAI|O like|O London|B-GPE and|O New|B-GPE York|I-GPE City|I-GPE .|Oz^I|PRP|O like|VBP|O London|NNP|I-GPE and|CC|O New|NNP|B-GPE York|NNP|I-GPE City|NNP|I-GPE .|.|Oz^I|PRP|O like|VBP|O London|NNP|B-GPE and|CC|O New|NNP|B-GPE York|NNP|I-GPE City|NNP|I-GPE .|.|Or   r   r   r6   r}   r   r   r   r   r   ry   IlikeLondonandNewYorkCityr   c                 S   r   r   r8   r;   r8   r8   r>   r?   S  r@   z3test_cli_converters_iob_to_docs.<locals>.<listcomp>New York Cityr  )r   rB   r1   r   r-   rangerh   r   )	r   rE   r   r   r:   r   r   expectedentr8   r8   r>   test_cli_converters_iob_to_docs@  s$   
r  c                  C   s  g d} d | }tt|dd}t|dksJ t|}|d dks%J t|d dks/J t|d d d	 d
ks=J tdd
D ]'}|d d d	 | }t|d dksXJ |d }dd |D g dksiJ qBt|d jdksuJ |d jD ]	}|jdv sJ qzd S )N).z-DOCSTART- -X- O OrH   zI	Ozlike	OzLondon	B-GPEzand	Oz	New	B-GPEz
York	I-GPEz
City	I-GPEz.	OrH   zI Ozlike OzLondon B-GPEzand Oz	New B-GPEz
York I-GPEz
City I-GPEz. OrH   zI PRP Oz
like VBP OzLondon NNP B-GPEzand CC OzNew NNP B-GPEzYork NNP I-GPEzCity NNP I-GPEz. . OrH   z	I PRP _ Ozlike VBP _ OzLondon NNP _ B-GPEz
and CC _ OzNew NNP _ B-GPEzYork NNP _ I-GPEzCity NNP _ I-GPEz. . _ OrH   z	I	PRP	_	Ozlike	VBP	_	OzLondon	NNP	_	B-GPEz
and	CC	_	OzNew	NNP	_	B-GPEzYork	NNP	_	I-GPEzCity	NNP	_	I-GPEz.	.	_	Or   r   r   r6   r}   r   r   r   r   r   ry   c                 S   r   r   r8   r;   r8   r8   r>   r?     r@   z9test_cli_converters_conll_ner_to_docs.<locals>.<listcomp>r
  r  )r   rB   r/   r   r-   r  rh   r   )r   rE   r   r   r:   r   r   r  r8   r8   r>   %test_cli_converters_conll_ner_to_docsY  s"   
0r  zargs,expected--x.foo10x.foor   z
--x.foo=10barz--x.foo=bar)r  --x.barbazTr  )r  x.barz--x.bar=baz)r  10.1r  z--x.bazfalseg333333$@F)r  r  x.baz)r  r   r  z--x.baz=falsec                 C   s   t | |ksJ d S N)r   )argsr  r8   r8   r>   test_parse_config_overrides  s   r%  r$  z--foo)r  r  z--bazc                 C   8   t t t|  W d    d S 1 sw   Y  d S r#  )pytestraisesr   r   r$  r8   r8   r>   #test_parse_config_overrides_invalid     
"r*  )r  r  r  c                 C   r&  r#  )r'  r(  
SystemExitr   r)  r8   r8   r>   %test_parse_config_overrides_invalid_2  r+  r-  c                  C   s  d} | t jtj< tg }t|dksJ |d dksJ |d dks$J |d du s,J |d	 d
ks4J dt jtj< tg d di ksDJ tt tg  W d    n1 sXw   Y  dt jtj< tt tg  W d    n1 sww   Y  t jtj= d S )Nz2--x.foo bar --x.bar=12 --x.baz false --y.foo=hellor   r  r  r  r   r"  Fzy.foohelloz--x)env_varzhello world)	osenvironr2   CONFIG_OVERRIDESr   r   r'  r(  r,  )	overridesresultr8   r8   r>   test_parse_cli_overrides  s$   

r5  rR   rO   r   rS   )rQ   parserrW   )rW   r   sentencizer)morphologizerspancatentity_linkerspancat_singlelabeltextcat_multilabeloptimize
efficiencyaccuracypretrainingc                 C   s@   t | |||dd}t|tsJ |rd|d d< t|dd d S )NF)rR   rS   r=  r@  gpuzmy_data.jsonlpathsraw_textT	auto_fill)r    
isinstancer
   r5   )rR   rS   r=  r@  configr8   r8   r>   test_init_config  s   rH  c                  C   s(   t  D ]\} }tdi |sJ qd S )Nr8   )r   itemsr(   )rR   datar8   r8   r>   test_model_recommendations  s   rK  value)parser,textcat,tagger parser, textcat ,tagger rM  rN  z  "parser"," textcat " ,"tagger "z  'parser',' textcat ' ,'tagger '[parser,textcat,tagger]z["parser","textcat","tagger"]z$[" parser" ,"textcat ", " tagger " ]rO  z[ parser, textcat , tagger]z['parser','textcat','tagger']z$[' parser' , 'textcat', ' tagger ' ]c                 C   s   t | ddg dksJ d S )NFintify)r6  r   rQ   r   rL  r8   r8   r>   test_string_to_list  s   rT  )z1,2,3z[1,2,3]z["1","2","3"]z[" 1" ,"2 ", " 3 " ]z[' 1' , '2', ' 3 ' ]c                 C   s4   t | ddg dksJ t | ddg dksJ d S )NFrP  )123T)r6   r   r   rR  rS  r8   r8   r>   test_string_to_list_intify  s   rX  c                  C   sP   t dtj } d| _tj| v r$d}t }t||}ttjt|ks&J d S d S )N==Fen_core_web_sm)r	   r   __version__prereleasesr   r   r3   )specr   compatibilityversionr8   r8   r>   test_download_compatibility  s   

r`  c                  C   sb   t dtj } d| _tj| v r-t \}}ttj}||i }t|dks'J d|v s/J d S d S )NrY  Fr   rZ  )r	   r   r[  r\  r$   r3   getr   )r]  
model_pkgscompatspacy_versioncurrent_compatr8   r8   r>   !test_validate_compatibility_table#  s   


rf  component_name)rW   r   r9  rQ   c                 C   s   t  }|| }dD ]}|| q
t|| jdksJ t L}t|| td| gddd}dd| d	|  d
di|d d | < t	|dd}t|| jdksUJ |
  t|| jdkseJ W d    d S 1 spw   Y  d S )N)T1T2T3T4r   r   r>  F)rR   rS   r=  rA  labelszspacy.read_labels.v1/z.json)z@readerspath
initializerU   TrD  r   )r&   r   	add_labelr   get_piperl  r7   r!   r    r5   ro  )rg  rL   	componentro   r   rG  nlp2r8   r8   r>   test_init_labels.  s.   

"rt  c                  C   s   t  } | d t| jg ksJ t  } | jdddddddid	 t| jg ks+J t d
dd }| d
 t| j d S )NrQ   r   rZ   zspacy.TextCatBOW.v1Tr6   F)z@architecturesexclusive_classes
ngram_sizeno_output_layerrG  third_party_testc                 S   s   dd S )Nc                 S   s   | S r#  r8   )xr8   r8   r>   <lambda>g      zItest_get_third_party_dependencies.<locals>.test_factory.<locals>.<lambda>r8   )rL   r   r8   r8   r>   test_factorye  s   z7test_get_third_party_dependencies.<locals>.test_factory)r&   r   r#   rG  rT   )rL   r}  r8   r8   r>   !test_get_third_party_dependenciesL  s&   


r~  zfactory_name,pipe_name))rW   rW   )rW   my_ner)r9  r9  )r9  
my_spancatc                 C   s   d}t  }|j| |d}|D ]}|| q|  ||j|ks$J | dkr7t||j t|ks5J d S t	|| t|ksBJ d S )N)AB)r   r9  )
r%   r   rp  ro  rq  rl  r   keysetr   )factory_name	pipe_namerl  rL   pipero   r8   r8   r>   test_get_labels_from_modeln  s   r  c                   C   st   t ddksJ t ddksJ t ddksJ t ddks J t ddks(J t ddks0J t ddks8J d S )	Nu   Meine_BäumeF_packagepackage_z.packagezpackage.z-packagezpackage-)r"   r8   r8   r8   r>   test_permitted_package_names  s   r  c                  C   s   t  } t| jg dd}t| jg dg dg dd}t||}t|gdg| d}|d	 d
ks1J t| jg dd}t| jg dg dg dd}t||}t|gdg| d}|d	 dks_J d S )N)Tokenr   r  r  r  words)r  r   r  )TFT)r   r   B-ENT)r  sent_startsrh   rW   Tboundary_cross_entsr   )r   r  zI-ENTr6   )r%   r)   r   r,   r   )rL   predrefegrJ  r8   r8   r>   test_debug_data_compile_gold  s*   

r  r9  c                 C   sN  t  }d}t|jg dd}t|dddt|dddg|j|< t|jg dd}t|dddt|dddg|j|< t||}t|g| g|d	}|d
 | tdddksVJ |d | dgdgdkseJ |d | t|dddgt|dddgdks~J |d | |dd g|dd gd|dd g|dd gddksJ d S )Nrw   WelcometotheBankofChinar   r  r   r   ORGr   GPETr9  r6   )r  r  spans_lengthspans_per_typesb_per_typer   r   )rp   rn   r   )r%   r)   r   r+   ri   r,   r   r   )rg  rL   	spans_keyr  r  r  rJ  r8   r8   r>   &test_debug_data_compile_gold_for_spans  s$   ""


r  c                  C   sT   t  } t| jg ddt| jdgdg}tdddd}t|dd}||ks(J d S )	N)r  r  r  r  r        ?      ?)chinabankr  T)	normalize)r%   r)   r   r   r   )rL   rF   r  freq_distributionr8   r8   r>   &test_frequency_distribution_is_correct  s   r  c                  C   sF   t ddd} t ddddd}t| |}d}tj||dd	s!J d S )
Nr  r  )abg333333?g?)r  r  cdgX2ı.?gMbP?)rel_tol)r   r   mathisclose)pqr4  r  r8   r8   r>   )test_kl_divergence_computation_is_correct  s
   
r  c                  C   s   t  } d}t| jg dd}t|dddt|dddg|j|< t| jg dd}t|dddt|dddg|j|< t||}|g}t|d	g| d
}t|||d}h d|	 s[J |d dkscJ |d dkskJ d S )Nrw   r  r  r   r   r  r   r  r9  Texamplescompiled_goldr  >   bdsdlengths
min_lengthr6   
max_length)
r%   r)   r   r+   ri   r,   r   r   issubsetkeysrL   r  r  r  r  r  rJ  span_characteristicsr8   r8   r>   *test_get_span_characteristics_return_value  s   ""
r  c                  C   s   t  } d}t| jg dd}t|dddt|dddg|j|< t| jg dd}t|dddt|dddg|j|< t||}|g}t|d	g| d
}t|||d}t| dS )zDTest if interface between two methods aren't destroyed if refactoredrw   r  r  r   r   r  r   r  r9  Tr  N)	r%   r)   r   r+   ri   r,   r   r   r   r  r8   r8   r>   0test_ensure_print_span_characteristics_wont_fail  s   ""
r  	threshold)F   P   r   Z   r   c                 C   s:   g dg dg dd}t || }t| | ksJ d S )Nr6   r   r   r   r   r   r   r   r   r6   r   r   span_type_1span_type_2span_type_3)r   sumvalues)r  sample_span_lengths
span_freqsr8   r8   r>   4test_span_length_freq_dist_threshold_must_be_correct   s   
r  c                  C   sV   g dg dg dd} d}t | |}t| |ksJ t| g dks)J d S )Nr  r  r  r  r  )r   r6   r   r   r   )r   r  r  rB   r  )r  r  r  r8   r8   r>   1test_span_length_freq_dist_output_must_be_correct  s   
r  c                  C   sF   t  } | d }t| |dddd W d    d S 1 sw   Y  d S )Nz
test.spacyblank:enr   r6   )r7   r   )	data_pathoutputr8   r8   r>   test_applycli_empty_dir  s   "r  c                  C   s   t  >} | d }td}|d}t }|| d  t| |dddd || || d  t| |dddd W d    d S 1 sDw   Y  d S )Ntestout.spacyrO   testing apply cli.testin.spacyr  r   r6   )r7   r   r   r*   r\   r   add)r  r  rL   r   docbinr8   r8   r>   test_applycli_docbin  s   

"r  c                  C   s   t  :} | d }dddg}ddig}t| d | t| |ddd	d	 t| d
 | t| |ddd	d	 W d    d S 1 s@w   Y  d S )Nr  Testing apply cli.   )fieldr  r  234
test.jsonlr  r6   ztest2.jsonl)r7   srslywrite_jsonlr   )r  r  rJ  data2r8   r8   r>   test_applycli_jsonl+  s   
"r  c               	   C   s~   t  2} | d }t| d d}|d W d    n1 sw   Y  t| |dddd W d    d S 1 s8w   Y  d S )Nr  ztest.foowr  r  r   r6   )r7   openwriter   )r  r  ftestr8   r8   r>   test_applycli_txt6  s   "r  c            	   	   C   s  t  y} | d }d}td}||}d|ig}t| d | t }|| || d  t| d d}|	| W d    n1 sFw   Y  t
| |d	dd
d
 tt ||j}t|dkshJ |D ]	}|j|kssJ qjW d    d S 1 sw   Y  d S )Nr  zTesting apply clirO   r   r  r  ztest.txtr  r  r6   r   )r7   r   r   r  r  r*   r  r\   r  r  r   rB   	from_diskget_docsr   r   r   )	r  r  r   rL   r   
jsonl_datar  r  r4  r8   r8   r>   test_applycli_mixed>  s(   


"r  c                  C   s   t jddd d} t J}|d }td}|d}| |j_tdd	}|| |	|d
  t
||dddd tt ||j}|d jj| ksNJ W d    d S 1 sYw   Y  d S )Nextr   )default)r  r   r  rO   r  T)store_user_datar  r  rH   r6   )r)   set_extensionr7   r   r   _r  r*   r  r\   r   rB   r  r  r   )valr  r  rL   r   r  r4  r8   r8   r>   test_applycli_user_dataS  s   


"r  c           
         s  dt dtt fdd 	ddttttttf f df dtt tt f f fdd	}t }| \}}td
d |D d	|d  t .}|	| t
||d ddddd\}}}|t| kseJ |d dksmJ W d    n1 sww   Y  |di ff\}}	t .}|	| t
||d ddddd\}}}|t| ksJ |d dksJ W d    n1 sw   Y  |di ff\}}	t }|	| t
||d dddddsJ W d    n1 sw   Y  | \}}	t /}|	| tt t
||d ddddd W d    n	1 sw   Y  W d    n1 s(w   Y  W d    d S W d    d S 1 sAw   Y  d S )NrL   returnc                 S   sj   g }dddddddgidfddddddd	gidffD ]}|  |d
 }|t||d  q|S )Nz/I am angry and confused in the Bank of America.      ?        )ANGRYCONFUSEDHAPPYrw   )r   .   r  )catsri   z$I am confused but happy in New York.)   #   r  r   r6   )rJ   appendr,   rI   )rL   rF   r=   r   r8   r8   r>   make_examplesd  s   

z.test_cli_find_threshold.<locals>.make_examplesr8   rU   .c                    sr   t  }|jddddid | D ]\}}|j||d q| |j fddd	 td
D ]}|  q-| fS )Nr<  tc_multir  g?)r  r   rG  rx  c                      s    S r#  r8   r8   new_examplesr8   r>   r{    r|  z;test_cli_find_threshold.<locals>.init_nlp.<locals>.<lambda>)get_examplesr   )r%   r   ro  r  update)rU   new_nlpcfncomp_configr:   r  r  r>   init_nlp|  s   z)test_cli_find_threshold.<locals>.init_nlpc                 S   s   g | ]}|j qS r8   )	reference)r<   rM   r8   r8   r>   r?     s    z+test_cli_find_threshold.<locals>.<listcomp>)rF   z
docs.spacyr  r  cats_macro_fT)rZ   r  r  threshold_key
scores_keyrY   r  r  r9  
spans_sc_fr<  r  )r8   )r'   r   r,   r   r]   r   r   r7   r*   r\   r   maxr  r'  r(  AttributeError)
capsysr  docs_dirrL   r  nlp_dirbest_threshold
best_scoreresr  r8   r  r>   test_cli_find_thresholdc  s   






 3$r  c                  C   s   t  `} g d}|D ]
}t| |   q
tt| dksJ tt| d ddks+J tt| dddks7J tt| dddksCJ tt| ddd	ksOJ tt| d
ddks[J W d    d S 1 sfw   Y  d S )N)z	data1.iobz	data2.iobz
data3.jsonzdata4.conllzdata5.conllzdata6.conllz	data7.txtr   )suffixjsonr6   iobr   conllr   pdfr   )r7   r   touchr   r   )r  filesfr8   r8   r>   test_walk_directory  s   
"r#  c               	   C   s|   ddg difddg difg} t  }g }| D ]}|t||d |d  qt|dg|d	}t|d
 dks<J d S )NShe likes green eggslemmas)sher  greeneggEat blue ham)eatbluehamr   r6   trainable_lemmatizerTlemmatizer_treesr   )r'   r  r,   rI   rJ   r   r   )r  rL   train_examplesr=   rJ  r8   r8   r>   *test_debug_data_trainable_lemmatizer_basic  s   "r0  c               	   C   s~   ddg difdg dg ddfg} t  }g }| D ]}|t||d |d	  qt|d
g|d}|d dks=J d S )Nr$  r%  )rH   r  r'  rH   zHe hates green eggs)Hehatesr'  eggs)rH   r2  r   r'  rH   )r  r%  r   r6   r-  Tpartial_lemma_annotationsr   r'   r  r,   rI   rJ   r   )partial_examplesrL   r/  r=   rJ  r8   r8   r>   ,test_debug_data_trainable_lemmatizer_partial  s   "r8  c               	   C   sx   ddg difddg difg} t  }g }| D ]}|t||d |d  qt|dg|d	}|d
 dks:J d S )Nr$  r%  )nor9  r9  r9  r)  )r9  r9  r9  r   r6   r-  Tn_low_cardinality_lemmasr   r6  )low_cardinality_examplesrL   r/  r=   rJ  r8   r8   r>   4test_debug_data_trainable_lemmatizer_low_cardinality  s   "r<  c               	   C   sh   di fdi fg} t  }g }| D ]}|t||d |d  qt|dg|d}|d dks2J d S )	Nr$  r)  r   r6   r-  Tno_lemma_annotationsr   r6  )unannotated_examplesrL   r/  r=   rJ  r8   r8   r>   2test_debug_data_trainable_lemmatizer_not_annotated  s   "r?  c                  C   s   ddl m}  ddlm}  d S )Nr   project_run)	spacy.clirA  spacy.cli.project.runr@  r8   r8   r>   test_project_api_imports*  s   rD  c                 C   s^   |  tddd  tjddd tt tjddd W d   dS 1 s(w   Y  dS )	zmTest that we can't tell spacy download to get an arbitrary model by using a
    relative path in the filenamerun_commandc                 S   s   d S r#  r8   )cmdr8   r8   r>   r{  3  r|  z5test_download_rejects_relative_urls.<locals>.<lambda>zen_core_web_sm-3.7.1T)directz../en_core_web_sm-3.7.1N)setattrr   downloadr'  r(  r,  )monkeypatchr8   r8   r>   #test_download_rejects_relative_urls/  s
   "rK  )r  r0  collectionsr   pathlibr   typingr   r   r   r   r'  r  clickr   packaging.specifiersr	   	thinc.apir
   r   r   rB  r   r   spacy.cli._utilr   r   r   spacy.cli.applyr   spacy.cli.debug_datar   r   r   r   r   r   r   r   spacy.cli.downloadr   r   spacy.cli.evaluater   spacy.cli.find_thresholdr   spacy.cli.init_configr   r   r    spacy.cli.init_pipeliner!   spacy.cli.packager"   r#   spacy.cli.validater$   spacy.lang.enr%   spacy.lang.nlr&   spacy.languager'   spacy.schemasr(   spacy.tokensr)   r*   spacy.tokens.spanr+   spacy.trainingr,   r-   r.   spacy.training.convertersr/   r0   r1   
spacy.utilr2   r3   r4   r5   utilr7   markissuerG   rN   rf   parametrizer]   r   r   r   r  r	  r  r  r%  r*  r-  r5  rH  rK  rT  rX  r`  rf  rt  r~  slowr  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r#  r0  r8  r<  r?  rD  rK  r8   r8   r8   r>   <module>   s   (



 



";

2D







"	


n