o
    i                     @   s  d dl Z d dlmZ d dlmZ d dlmZ d dlmZ d dl	m
Z
 d dlmZ d dlmZ d d	lmZmZmZ d
ZddedgiifddeddgiifgZddedgiifddeg diifddeg iifgZefddZe jdg dg dg dfg dg dg dfg dg dg dfg dg dg dfe dd gg dd!d"ge g d#g dg d$e d%d&gg dd"d'ggd(d) Zd*d+ Zd,d- Ze jd.g d/d0d1 Zd2d3 ZdS )4    N)Config)util)English)Language)span_finder_default_config)Doc)Example)fix_random_seedmake_tempdirregistrypytestzWho is Shaka Khan?spans)      zI like London and Berlin.r            I like London and Berlin)r   r   )r   r    c                 C   s6   g }|D ]}t | |d |d }|| q|S )Nr      )r   	from_dictmake_docappend)nlpdatatrain_examplesteg r    Y/home/ubuntu/.local/lib/python3.10/site-packages/spacy/tests/pipeline/test_span_finder.pymake_examples   s
   r"   z4tokens_predicted, tokens_reference, reference_truths)Mon.-June16)Mon.r%   r&   r'   )r   r   r)   r)   r   r   r)   )r(   r%   Juner'   )r)   r)   r   r   r   r   r)   )r(   r%   r&   16)r(   z-Junze 16)r)   r)   r)   r)   z	Mon.-Juner'   r.   r)   )Mon.-r&   r'   )r)   r*   r)   r2   zJune 16r-   c                 C   s  t  }t|j| dgt|  d}t|j|dgt| d}t||}|jddg|jjt< |j	ddtid}|
  |jj}|j|jkrftjtdd	 ||g| W d    d S 1 s_w   Y  d S ||g|\}	}
t|	t| ksyJ |jj|	|j| d S )
NF)wordsspaces   	   span_finder	spans_keyconfigz*must match between reference and predictedmatch)r   r   vocablenr   	reference	char_spanr   	SPANS_KEYadd_pipe
initializemodelopstextr   raises
ValueError_get_aligned_truth_scoresxptestingassert_array_equalasarray)tokens_predictedtokens_referencereference_truthsr   	predictedr?   exampler7   rE   truth_scoresmasksr    r    r!   test_loss_alignment_example'   s0   )

rU   c                  C   s   t  } | d| dg}|d dd g|d jt< |d dd g|d jt< d}|D ]}|t|7 }q+t t }t	|d }|j
|d	 ||}t||ksVJ t|d d
ks`J d S )NThis is an example.This is the second example.r         r   r5   rD   )X   )r   r   rA   r>   r   from_strr   interpolater   resolverC   predict)r   docstotal_tokensdocr:   rD   predictionsr    r    r!   test_span_finder_modelg   s   
rd   c                  C   s   t  } | d| dg}|d dd g|d jt< |d dd g|d jt< | jdd	tid
}|   t||}t|d jv sDJ d S )NrV   rW   r   rX   rY   r   r5   r7   r8   r9   )r   r   rA   rB   rC   listpipe)r   r`   r7   r    r    r!   test_span_finder_component|   s   rg   z"min_length, max_length, span_count))r   r   r   )NN   )r[   N   )Nr   r[   )r[   rX   r[   c                    s  t  }|d}dkr4 dkr4tjtdd |jd tdd}W d    d S 1 s-w   Y  d S |jd tdd}|  g d}||g| |jt sUJ t	|jt |ks`J  d u rht
d	 d u rnd
t fdd|jt D sJ d S )Nz1Me and Jenny goes together like peas and carrots.r   z"Both 'min_length' and 'max_length'r;   r7   )
max_length
min_lengthr8   r9   )
r-   r)   r*   r)   r)   r)   r*   r)   r.   r)   infr   c                 3   s,    | ]}t |  ko kn  V  qd S N)r>   .0spanrj   rk   r    r!   	<genexpr>   s   * z4test_set_annotations_span_lengths.<locals>.<genexpr>)r   r   rG   rH   rB   rA   rC   set_annotationsr   r>   floatall)rk   rj   
span_countr   rb   r7   scoresr    rq   r!   !test_set_annotations_span_lengths   s@   
		&rx   c                     s  t d t } | jddtid}t|  | j fddd}|jdd	ks(J td
D ]}i }| j	 ||d q,|d dk sAJ d}| |}|j
t }t|dksTJ tdd |D h dkscJ t 2}| | t|}	|	|}
|
j
t }t|dksJ tdd |D h dksJ W d    n1 sw   Y  |  }dt d|v sJ |dt d dksJ |dt d dksJ | d}t|j
t dksJ d S )Nr   r7   r8   r9   c                      s    S rm   r    r    r   r    r!   <lambda>   s    z%test_overfitting_IO.<locals>.<lambda>)get_examplesnOr[   2   )sgdlossesgMbP?r   rX   c                 S      g | ]}|j qS r    rF   rn   r    r    r!   
<listcomp>       z'test_overfitting_IO.<locals>.<listcomp>>   London and BerlinBerlinLondonc                 S   r   r    r   rn   r    r    r!   r      r   spans__f_pg      ?_rg      ?r   r   )r	   r   rB   rA   r"   rC   rD   get_dimrangeupdater   r>   setr
   to_diskr   load_model_from_pathevaluate)r   r7   	optimizerir   	test_textrb   r   tmp_dirnlp2doc2spans2rw   r    ry   r!   test_overfitting_IO   s:   



 
r   )r   	thinc.apir   spacyr   spacy.lang.enr   spacy.languager   spacy.pipeline.span_finderr   spacy.tokensr   spacy.trainingr   
spacy.utilr	   r
   r   rA   
TRAIN_DATATRAIN_DATA_OVERLAPPINGr"   markparametrizeparamrU   rd   rg   rx   r   r    r    r    r!   <module>   s    	

(
3