o
    i8                  #   @   s  d dl Z d dlZd dlZd dlmZ d dlmZmZm	Z	m
Z
 d dlmZ d dlZd dlmZ d dlmZmZ d dlmZ d dlmZ d d	lmZ d d
lmZmZmZ d dlmZmZmZ d dl m!Z! d dl"m#Z# d dl$m%Z%m&Z& d dl'm(Z( d dl)m*Z* ddl+m,Z- ddlm.Z. dddddifdddddifgZ/ddddddifddddddifgZ0dZ1e 2e1d Z3dd  Z4d!d" Z5ej67d#d$d% Z8ej67d&d'd( Z9ej6:d)eeeeeegej67d*d+d, Z;d-Z<ej6:d.d/d0gej67d1d2d3 Z=ej67d4d5d6 Z>ej67d7d8d9 Z?ej6j@d:d;d<d= ZAej6j@d:d;d>d? ZBej6:d@d/d0gdAdB ZCej6:dCd/e4fd0e5fgdDdE ZDej6:d@d/d0gdFdG ZEej6:dCd/e4fd0e5fgdHdI ZFej6jGej6:dJd/dKdLdMdNdOfd/dKdLdLdNdOfd0dKdMdMdNdOfd0dKdMdLdNdOfd/dPdMddQdRddSdSddT	fd0dPdMddQdRddSdSddT	fd/dUe!dKdLdMdNdOdVfd/dUe!dKdLdLdNdOdVfd0dUe!dKdMdMdNdOdVfd0dUe!dKdMdLdNdOdVfd/dWe!dLdXfd0dWe!dMdXfgdYdZ ZHej6:dJd/d[dLdMdNdOfd/d[dLdLdNdOfd0d[dMdMdNdOfd0d[dMdLdNdOfd/d\e!dLdLdLdLdLd]fd0d\e!dMdLdLdLdLd]fgd^d_ ZIej6:dJd/d[dLdMdNdOfd/d[dLdLdNdOfd0d[dMdMdNdOfd0d[dMdLdNdOfd/d\e!dLdLdLdLdLd]fd0d\e!dMdLdLdLdLd]fgd`da ZJdbdc ZKej6:dJd0dUe3d[dMdSdMdddVfd/dUe3d[dLdedMdddVfd/dfe3dLdXfd0dfe3dMdXfd/d\e3dLdLdLdLdLd]fd0d\e3dMdLdLdLdLd]fgdgdh ZLej6:did/e4e/fd0e5e0fgdjdk ZMdldm ZNdndo ZOej6jGej6:dpd0e0dKdMdSdMddfd/e/dKdLdqdMddfd0e0dPdMddQdRddSdSddT	fd/e/dPdMddQdRddSdSddT	fd/e/dWe!dLdXfd0e0dWe!dMdXfd0e0drdMdSdMddfd/e/drdLdqdMddfd0e0drdMdNdLddfd/e/drdLddLddfd0e0d[dMdSdMddfd/e/d[dLdqdMddfd0e0d[dMdNdLddfd/e/d[dLddLddfd0e0dUe!d[dMdSdMdddVfd/e/dUe!d[dLdedMdddVfd/e/dse!dLdXfd0e0dse!dMdXfd/e/dfe!dLdXfd0e0dfe!dMdXfd/e/d\e!dLdLdLdLdLd]fd0e0d\e!dMdLdLdLdLd]fgdtdu ZPdvdw ZQdxdy ZRdzd{ ZSd|d} ZTej6:d~ddgdeUdeVfddZWej6:dddgdeUdeVfddZXdd ZYdd ZZej6:dddgdd Z[dS )    N)assert_almost_equal)Configcompoundingfix_random_seedget_current_ops)msg)util)print_prf_per_typeprint_textcats_auc_per_cat)English)Language)TextCategorizer)single_label_bow_configsingle_label_cnn_configsingle_label_default_config)multi_label_bow_configmulti_label_cnn_configmulti_label_default_config)DEFAULT_TOK2VEC_MODEL)Scorer)DocDocBin)Example)init_nlp   )build_lazy_init_tok2vec)make_tempdirzI'm so happy.cats      ?        )POSITIVENEGATIVEzI'm so angryzI'm angry and confused)ANGRYCONFUSEDHAPPYzI'm confused but happyz?
[model]
@architectures = "test.LazyInitTok2Vec.v1"
width = 96
modelc              	      >   g  t D ]} t| |d |d  q fdd}|S )Nr      c                          S N r*   train_examplesr*   U/home/ubuntu/.local/lib/python3.10/site-packages/spacy/tests/pipeline/test_textcat.pyget_examples:      z4make_get_examples_single_label.<locals>.get_examples)TRAIN_DATA_SINGLE_LABELappendr   	from_dictmake_docnlptr.   r*   r+   r-   make_get_examples_single_label5   
   "r7   c              	      r&   )Nr   r'   c                      r(   r)   r*   r*   r+   r*   r-   r.   E   r/   z3make_get_examples_multi_label.<locals>.get_examples)TRAIN_DATA_MULTI_LABELr1   r   r2   r3   r4   r*   r+   r-   make_get_examples_multi_label@   r8   r:   i  c               	      s  ddg} g d}g d}t d}g }t||D ]\}  fdd| D }|t||d|i qd	d
ddd}|jdd|id
d}| D ]}	||	 qG|j	dd/ |
 }
tdD ]}i }tj|tdddd}|D ]}|j||
d|d qoq^W d   dS 1 sw   Y  dS )zXTest whether adding n-grams in the textcat works even when n > token length of some docs	offensiveinoffensivezThis is an offensive textz!This is the second offensive textinoffr;   r;   r<   enc                       i | ]}|| kqS r*   r*   .0labeltrain_instancer*   r-   
<dictcomp>Y       z"test_issue3611.<locals>.<dictcomp>r   spacy.TextCatBOW.v1Tr   F@architecturesexclusive_classes
ngram_sizeno_output_layertextcatr%   configlastenable         @      @@jt?size皙?examplessgddroplossesN)spacyblankzipr1   r   r2   r3   add_pipe	add_labelselect_pipes
initializeranger   	minibatchr   update)unique_classesx_trainy_trainr5   
train_datatextcat_dictr%   rO   rD   	optimizerir`   batchesbatchr*   rE   r-   test_issue3611K   s4   
"ru   i  c               	      sF  ddg} g d}g d}t d}g }t||D ]\}  fdd| D }|t||d|i qd	d
ddd}|jdd|id
d}| D ]}	||	 qG|j	dd. |
 }
tdD ]}i }tj|tdddd}|D ]}|j||
d|d qoq^W d   n1 sw   Y  |d}|jd dksJ |jd dksJ dS )z.Test whether textcat works fine with empty docr;   r<   r=   r?   r@   c                    rA   r*   r*   rB   rE   r*   r-   rG   ~   rH   z"test_issue4030.<locals>.<dictcomp>r   rI   Tr   FrJ   rO   r%   rP   rS   rU   rV   rW   rX   rY   r[   r\   N r   )ra   rb   rc   r1   r   r2   r3   rd   re   rf   rg   rh   r   ri   r   rj   r   )rk   rl   rm   r5   rn   ro   rp   r%   rO   rD   rq   rr   r`   rs   rt   docr*   rE   r-   test_issue4030p   s:   
	rx   textcat_configi  c                 C   s  d}t  | }g }tdD ]J}td t }d}dddddi}|j||d	d
}t|d D ]}	||	 q/|  |	|}
|
t|
|g |j|
g}||d  qt|dksaJ t }t||d ||d dd t||d ||d dd dS )zWTest that after fixing the random seed, the results of the pipeline are truly identicalrO   rU   r   zUOnce hot, form ping-pong-ball-sized balls of the mixture, each weighing roughly 25 g.r   r   r   )Labe1Label2Label3TrP   r'      decimalr   N)r   from_strrh   r   r   rd   setre   rg   r3   rj   r   r2   r%   predictr1   lenr   r   to_numpy)ry   	componentpipe_cfgresultsrr   r5   ro   annotspiperD   rw   resultopsr*   r*   r-   test_issue5551   s(   
"&r   a  
[paths]
train = "TRAIN_PLACEHOLDER"
raw = null
init_tok2vec = null
vectors = null

[system]
seed = 0
gpu_allocator = null

[nlp]
lang = "en"
pipeline = ["textcat"]
tokenizer = {"@tokenizers":"spacy.Tokenizer.v1"}
disabled = []
before_creation = null
after_creation = null
after_pipeline_creation = null
batch_size = 1000

[components]

[components.textcat]
factory = "TEXTCAT_PLACEHOLDER"

[corpora]

[corpora.train]
@readers = "spacy.Corpus.v1"
path = ${paths:train}

[corpora.dev]
@readers = "spacy.Corpus.v1"
path = ${paths:train}


[training]
train_corpus = "corpora.train"
dev_corpus = "corpora.dev"
seed = ${system.seed}
gpu_allocator = ${system.gpu_allocator}
frozen_components = []
before_to_disk = null

[pretraining]

[initialize]
vectors = ${paths.vectors}
init_tok2vec = ${paths.init_tok2vec}
vocab_data = null
lookups = null
before_init = null
after_init = null

[initialize.components]

[initialize.components.textcat]
labels = ['label1', 'label2']

[initialize.tokenizer]
component_namerO   textcat_multilabeli  c                 C   sr   dd }t  (}|d }|| td| }|d| }t|}t| W d   dS 1 s2w   Y  dS )z/Test initializing textcat with labels in a listc                 S   sj   t d}|d}ddd|_t|gd }| d}|| W d    d S 1 s.w   Y  d S )Nr@   z	Some textr   r'   )label1label2)docswb)ra   rb   r3   r   r   to_bytesopenwrite)out_filer5   rw   out_datafile_r*   r*   r-   create_data	  s   

"z#test_issue6908.<locals>.create_dataztrain.spacyTEXTCAT_PLACEHOLDERTRAIN_PLACEHOLDERN)r   CONFIG_ISSUE_6908replaceas_posixr   load_config_from_strr   )r   r   tmp_path
train_path
config_strrQ   r*   r*   r-   test_issue6908  s   

"r   ik  c                  C   sD   ddd d} t t|  ddddd d d dd} tt| d	d
d d S )Ng0L}?g7?)LABEL_ALABEL_BLABEL_CgAϢ?g"nN%?g3M'?)prf)r   r   foobar)nametype)r
   r   r	   )scoresr*   r*   r-   test_issue7019  s   


r   i&  c                  C   s   t  } | d}t| }| | | }|dd |D }|||d }||d |jdddd }|t|ks>J d S )NrO   c                 S      g | ]}|j qS r*   )	predicted)rC   egr*   r*   r-   
<listcomp>-      z"test_issue9904.<locals>.<listcomp>r   r   )axis)	r   rd   r7   rg   r   get_lossrepeatpytestapprox)r5   rO   r.   r]   r   lossloss_double_bsr*   r*   r-   test_issue9904%  s   

r   z#Test is flakey when run with others)reasonc                  C   s   t  } | d}|d |   tdD ]}dD ]\}}| |dd|iif qq| d}d|jv s5J |jd dks>J d S )NrO   answerr}   ))aaaar   )bbbbr   )aar   )	bbbbbbbbbr   )aaaaaar'   r   aaa      ?)r   rd   re   rg   rh   rj   r   )r5   rO   rr   ro   r   rw   r*   r*   r-   test_simple_train4  s   

r   c               	      s  t d tj d g } t }g d}|D ](}|D ]#fdd|D  | t|jdgd |g dgd  d f qqt |  t|jdd	}|D ]}|	| qM|
d
d }tdD ]}i } fdd| D }	|j|	||d t |  q`|D ]B}|D ]=t|jdgd |g dgd  d}
fdd|D }||
 |
j D ]\}}|| s|dk sJ q|dksJ qqq}d S )Nr}   )abcc                    s   i | ]	}|t  |kqS r*   )floatrC   letterw2r*   r-   rG   Q  s    z2test_textcat_learns_multilabel.<locals>.<dictcomp>drU   )words   )widthc                   S   s   g S r)   r*   r*   r*   r*   r-   <lambda>W      z0test_textcat_learns_multilabel.<locals>.<lambda>   c                    s    g | ]\}}t |d  iqS r   )r   r2   )rC   rw   catr   r*   r-   r   Z  s     z2test_textcat_learns_multilabel.<locals>.<listcomp>r^   r`   c                    s   i | ]}| |kqS r*   r*   r   r   r*   r-   rG   `  rH   r   )randomseednumpyr   r1   r   vocabshuffler   re   rg   rh   rj   r   items)r   r5   lettersw1rO   r   rq   rr   r`   r]   rw   truthr   scorer*   )r   r   r-   test_textcat_learns_multilabelH  s@   
2
&r   r   c                 C   s   t  }|| }|d tt |d W d    n1 s"w   Y  | dkrGtt |  W d    d S 1 s@w   Y  d S |  d S )Nr   	   rO   )r   rd   re   r   raises
ValueErrorrg   )r   r5   rO   r*   r*   r-   test_label_typesi  s   


"r   zname,get_examplesc                    s`   t  }|| }||  fdd}tt |j|d W d    d S 1 s)w   Y  d S )Nc                     s0     } | d j }t|j d }d|j|< | S )Nr   g       @)	referencelistr   keys)r]   refkeyexample_getterr*   r-   invalid_examples  s
   

z2test_invalid_label_value.<locals>.invalid_examplesr.   r   rd   r   r   r   rg   )r   r.   r5   rO   r   r*   r   r-   test_invalid_label_valuex  s   
"r   c                 C   sH   t  }||  tt |  W d    d S 1 sw   Y  d S r)   r   )r   r5   r*   r*   r-   test_no_label  s
   

"r   c                 C   s$   t  }||  |j||d d S )Nr   )r   rd   rg   )r   r.   r5   r*   r*   r-   test_implicit_label  s   
r   zname,textcat_configrI   TFrU   )rK   rL   rN   rM   zspacy.TextCatEnsemble.v1@   i  r'   )	rK   rL   pretrained_vectorsr   
embed_size
conv_depthwindow_sizerM   dropoutzspacy.TextCatEnsemble.v2)rK   tok2veclinear_modelzspacy.TextCatCNN.v1)rK   r   rL   c                 C   s   t  }d|i}|j| |d}|d |d |  |jddv s&J tt |d W d   dS 1 s<w   Y  dS )	z/The old textcat architectures weren't resizabler%   rQ   r    r!   nOr   NNEUTRALN)	r   rd   re   rg   r%   maybe_get_dimr   r   r   r   ry   r5   pipe_configrO   r*   r*   r-   test_no_resize  s   

"r  zspacy.TextCatBOW.v3zspacy.TextCatReduce.v1)rK   r   rL   use_reduce_firstuse_reduce_lastuse_reduce_maxuse_reduce_meanc                 C   s   t  }d|i}|j| |d}|d |d |jddv s"J |  |jddv s0J |d |jddv s?J d	S )
z+The new textcat architectures are resizabler%   r   r    r!   r   r   r   )rU   NN)r   rd   re   r%   r  rg   r  r*   r*   r-   test_resize  s   


r	  c                    s  t d t }d|i}|j| |d}g  tD ]\}} t||| q|j fddd}|j	
ddv s;J td	D ]}i }	|j ||	d
 q?d}
||
}t|jdks[J |jd }|jd }|d ||
}t|jdkswJ |jd |ksJ |jd |ksJ |jd dksJ td	D ]}i }	|j ||	d
 q||
}t|jdksJ |jd |ksJ |jd |ksJ |jD ]}|j| dksJ qd S )Nr   r%   r   c                      r(   r)   r*   r*   r+   r*   r-   r     r   z*test_resize_same_results.<locals>.<lambda>r   r   r   r}   r   I am happy.r   r    r!   r   rU   r'   )r   r   rd   r0   r1   r   r2   r3   rg   r%   r  rh   rj   r   r   re   )r   ry   r5   r  rO   ro   annotationsrq   rr   r`   	test_textrw   pos_predneg_predr   r*   r+   r-   test_resize_same_results  sB   



r  c                     s~   t  } | d g  tD ]\}} t| || qtt	 | j
 fddd W d    d S 1 s8w   Y  d S )NrO   c                      r(   r)   r*   r*   r+   r*   r-   r   +  r   z.test_error_with_multi_labels.<locals>.<lambda>r   )r   rd   r9   r1   r   r2   r3   r   r   r   rg   )r5   ro   r  r*   r+   r-   test_error_with_multi_labels$  s   
"r  rJ   r}   z#spacy.TextCatParametricAttention.v1c                 C   sH   t  }d|i}|j| |d}|d |d |  |dg d S )Nr%   r   r    r!   zThis is a test.)r   rd   re   rg   r   r  r*   r*   r-   test_tok2vec_lazy_init/  s   

r  zname,get_examples, train_datac           	      C   s   t  }|| }|D ]\}}|d D ]	\}}|| qq
|  |j||d tt |jdd d W d    n1 sDw   Y  tt |j| d W d    d S 1 saw   Y  d S )Nr   r   c                   S   s   d S r)   r*   r*   r*   r*   r-   r   [  r   z*test_initialize_examples.<locals>.<lambda>)	r   rd   getr   re   rg   r   r   	TypeError)	r   r.   rn   r5   rO   ro   r  rD   valuer*   r*   r-   test_initialize_examplesJ  s   
"r  c                     sl  t d t   d} g tD ]\}}t || q jfddd}| j	
ddks5J tdD ]}i } j||d	 q9|d d
k sNJ d} |}|j}|d dks_J |d |d  tddksoJ t 1}	 |	 t|	}
|
|}|j}|d dksJ |d |d  tddksJ W d    n1 sw   Y   }|d dksJ |d dksJ |d dksJ |d dksJ d|v sJ g d}dd  |D }dd  |D }dd  fdd|D D }t||D ]\}}|D ]}t|| || dd qqt||D ]\}}|D ]}t|| || dd q#qd S )Nr   rO   c                      r(   r)   r*   r*   r+   r*   r-   r   i  r   z%test_overfitting_IO.<locals>.<lambda>r   r   r   2   r   {Gz?r
  r    ?r!   r   gMbP?cats_micro_fcats_macro_fcats_macro_auc
cats_scorecats_score_desczJust a sentence.zI like green eggs.r
  z
I eat ham.c                 S   r   r*   r   rC   rw   r*   r*   r-   r     r   z'test_overfitting_IO.<locals>.<listcomp>c                 S   r   r*   r   r  r*   r*   r-   r     r   c                 S   r   r*   r   r  r*   r*   r-   r     r   c                       g | ]} |qS r*   r*   rC   ro   r5   r*   r-   r         r}   r~   )r   r   rd   r0   r1   r   r2   r3   rg   r%   get_dimrh   rj   r   r   r   r   to_diskr   load_model_from_pathevaluater   rc   r   )rO   ro   r  rq   rr   r`   r  rw   r   tmp_dirnlp2doc2cats2r   textsbatch_cats_1batch_cats_2no_batch_catscats_1cats_2r   r*   r5   r,   r-   test_overfitting_IO`  sX   
 

"
	r3  c                     s&  t d t   d} g tD ]\}}t || q jfddd}| j	
ddks5J tdD ]}i } j||d	 q9|d d
k sNJ d} |}|j}|d dks_J |d dksgJ t )}	 |	 t|	}
|
|}|j}|d dksJ |d dksJ W d    n1 sw   Y   }|d dksJ |d dksJ d|v sJ g d}dd  |D }dd  |D }dd  fdd|D D }t||D ]\}}|D ]}t|| || dd qqt||D ]\}}|D ]}t|| || dd qqd S )Nr   r   c                      r(   r)   r*   r*   r+   r*   r-   r     r   z+test_overfitting_IO_multi.<locals>.<lambda>r   r   rU   d   r   r  zI am confused but happy.r$   r  r#   r  r   r  r  r  c                 S   r   r*   r   r  r*   r*   r-   r     r   z-test_overfitting_IO_multi.<locals>.<listcomp>c                 S   r   r*   r   r  r*   r*   r-   r     r   c                 S   r   r*   r   r  r*   r*   r-   r     r   c                    r   r*   r*   r!  r"  r*   r-   r     r#  r}   r~   )r   r   rd   r9   r1   r   r2   r3   rg   r%   r$  rh   rj   r   r   r%  r   r&  r'  r   rc   r   )rO   ro   r  rq   rr   r`   r  rw   r   r(  r)  r*  r+  r   r,  batch_deps_1batch_deps_2no_batch_depsr0  r1  r   r*   r2  r-   test_overfitting_IO_multi  sT   



	r8  zname,train_data,textcat_config   zspacy.TextCatBOW.v2zspacy.TextCatCNN.v2c                 C   s   d|i}t  }|j| |d}g }|D ]!\}}|t||| |d D ]	\}	}
||	 q)q|	 }t
dD ]}i }|j|||d q<d S )Nr%   r   r   r}   r   )r   rd   r1   r   r2   r3   r  r   re   rg   rh   rj   )r   rn   ry   r  r5   rO   r,   ro   r  rD   r  rq   rr   r`   r*   r*   r-   test_textcat_configs  s   'r:  c                  C   s   t  } | d}t| }|j|ddgdd |jdksJ |jd dks&J | d}t| }tt	 |j|ddgdd W d    n1 sIw   Y  |j|dd	gd
 |jdks^J d|jvseJ d S )NrO   POSNEGlabelspositive_label)r;  r<  r?  r   FICTIONDRAMAr>  )r@  rA  )
r   rd   r7   rg   r>  cfgr:   r   r   r  )r5   rO   r.   r   r*   r*   r-   test_positive_class   s    


rD  c                  C   s\   t  } | d}t| }tt |j|ddgdd W d    d S 1 s'w   Y  d S )NrO   SOMETHINGr;  r=  )r   rd   r7   r   r   r   rg   r5   rO   r.   r*   r*   r-   test_positive_class_not_present  s   
"rH  c                  C   s\   t  } | d}t| }tt |j|g ddd W d    d S 1 s'w   Y  d S )NrO   )rE  rF  r;  r;  r=  )r   rd   r:   r   r   r   rg   rG  r*   r*   r-   test_positive_class_not_binary  s   

"rI  c                  C   s  g } t  }|d}ddddd|_|d}ddddd|_| t|| |d}ddddd|_|d}ddddd|_| t|| t j| dg dd}|d d	 d
 dks\J |d d	 d dkshJ |d d d
 dkstJ |d d d dksJ |d d d
 dksJ |d d d dksJ |d d d
 dksJ |d d d dksJ |d dksJ |d dksJ d S )Noner   wintersummerspringautumnr   twor   rB  cats_f_per_typerL  r   r   r   rM  r   rN  rO  cats_micro_pg?cats_micro_rgUUUUUU?r   r   r1   r   r   
score_cats)r,   r5   ref1pred1ref2pred2r   r*   r*   r-   test_textcat_evaluation%  s2   
rZ  zmulti_label,spring_p)Tr   )Fr   multi_labelspring_pc           	      C   s   g }t  }|d}ddddd|_|d}ddddd|_|t|| |d}dddd|_|d}ddddd|_|t|| t j|dg d| d	}|d
 d d |ks\J |d
 d d dkshJ dS )z
    multi-label: the missing 'spring' in gold_doc_2 doesn't incur a penalty
    exclusive labels: the missing 'spring' in gold_doc_2 is interpreted as 0.0rJ  r   r   rL  rM  rO  rN  rP  rL  rM  rO  r   rK  )r>  r[  rQ  rN  r   r   NrT  )	r[  r\  r,   r5   rV  rW  rX  rY  r   r*   r*   r-   test_textcat_eval_missingD  s(   r_  zmulti_label,expected_loss)Tr   )Fg      ?expected_lossc                    s   g  t  }|d}ddddd} t|d|i |d}dddd} t|d|i | r8|d}n|d	}t|tsDJ | fd
d |jj	j
g dg dgdd}| |\}	}
|	|ksjJ dS )z
    multi-label: the missing 'spring' in gold_doc_2 doesn't incur an increase in loss
    exclusive labels: the missing 'spring' in gold_doc_2 is interpreted as 0.0 and adds to the loss
    rJ  r   r   r]  r   rP  r^  r   rO   c                      r(   r)   r*   r*   r+   r*   r-   r     r   z#test_textcat_loss.<locals>.<lambda>)r   r   r   r   )r   r   r   r   r   )dtypeN)r   r1   r   r2   rd   
isinstancer   rg   r%   r   asarrayr   )r[  r`  r5   doc1cats1r*  r+  rO   r   r   d_scoresr*   r+   r-   test_textcat_lossf  s$   	
rg  c                     s  t  } | d g  tD ]\}} t| || q| j fddd |  }d|d   kr9dks<J  J | j dd	id
}|d d d dksQJ | j ddid
}|d }|d d d d	ksjJ | j dddd
}|d }|d d d d	ksJ ||ksJ d S )Nr   c                      r(   r)   r*   r*   r+   r*   r-   r     r   z3test_textcat_multilabel_threshold.<locals>.<lambda>r   r   r  r'   	thresholdr   
scorer_cfgrQ  r    r   )rh  r?  	r   rd   r0   r1   r   r2   r3   rg   r'  )r5   ro   r  r   macro_fpos_fr*   r+   r-   !test_textcat_multilabel_threshold  s&   

 
rn  c                     s   t  } | d g  tD ]\}} t| || q| j fddd |  }d|d   kr9dks<J  J | j dd	id
}|d d d dksQJ | j ddid
}|d d d d	ksfJ d S )Nr   c                      r(   r)   r*   r*   r+   r*   r-   r     r   z.test_textcat_multi_threshold.<locals>.<lambda>r   r   r  r'   rh  r   ri  rQ  r    r   rk  )r5   ro   r  r   r*   r+   r-   test_textcat_multi_threshold  s   

 ro  zcomponent_name,scorer)rO   zspacy.textcat_scorer.v1)r   z"spacy.textcat_multilabel_scorer.v1c                    s   t  }|j| dd|iid g  tD ]\}} t||| q|j fddd | }d|d   kr?d	ksBJ  J d
S )zQCheck that legacy scorers are registered and produce the expected score
    keys.scorerz@scorersr   c                      r(   r)   r*   r*   r+   r*   r-   r     r   z-test_textcat_legacy_scorers.<locals>.<lambda>r   r   r  r'   Nrk  )r   rp  r5   ro   r  r   r*   r+   r-   test_textcat_legacy_scorers  s   

$rq  )\r   numpy.randomr   r   numpy.testingr   	thinc.apir   r   r   r   wasabir   ra   r   spacy.cli.evaluater	   r
   spacy.lang.enr   spacy.languager   spacy.pipeliner   spacy.pipeline.textcatr   r   r   !spacy.pipeline.textcat_multilabelr   r   r   spacy.pipeline.tok2vecr   spacy.scorerr   spacy.tokensr   r   spacy.trainingr   spacy.training.initializer   r   r   _r   r0   r9   lazy_init_model_configr   LAZY_INIT_TOK2VEC_MODELr7   r:   markissueru   rx   parametrizer   r   r   r   r   skipr   r   r   r   r   r   slowr  r	  r  r  r  r  r3  r8  r:  rD  rH  rI  rZ  boolr   r_  rg  rn  ro  rq  r*   r*   r*   r-   <module>   s   


$
(
@







 





-

65%
