o
    i>)                     @   s   d dl Z d dlmZ d dlZd dlmZ d dlmZ d dlm	Z	m
Z
mZ d dlmZmZ d dlmZmZmZ dd	lmZ edddZejdd Zejdd Zejddd Zdd Zejddd Zdd ZdS )    N)contextmanagerEnglish)contains_cycle)DocDocBinSpan)CorpusExample)create_lower_casing_augmentercreate_orth_variants_augmentermake_whitespace_variant   )make_tempdirroundtrip.spacyc                 c   sL    t  }|| }t| d| |V  W d    d S 1 sw   Y  d S )N)docs)r   r   to_disk)r   nametmpdiroutput_file r   X/home/ubuntu/.local/lib/python3.10/site-packages/spacy/tests/training/test_augmenters.pymake_docbin   s   "r   c                   C   s   t  S )Nr   r   r   r   r   nlp   s   r   c                 C   sH   g d}g d}g d}g d}ddd}t | j||||d}||_|S )	N)
Sarahz'ssisterflewtoSiliconValleyviaLondon.)
NNPPOSNNVBDINr#   r#   r'   r#   r"   )
PROPNPARTNOUNVERBADPr(   r(   r,   r(   PUNCT)
zB-PERSONzI-PERSONO r.   zB-LOCzI-LOCr.   B-GPEr.         ?        )TRAVELBAKING)wordstagsposents)r   vocabcats)r   r5   r6   r7   r8   r:   docr   r   r   r;       s   
r;   zignore::UserWarningc                 C   sx  dgddgddgg ddg}g d}g d}d	gt | }d
|d< d
|d< t| j|||d}tddd|id}t|gd }t||d}t||  W d    n1 sWw   Y  tddd|id}t|gd %}t||d}|| D ]}	|	jD ]}
|
j|
j	 ksJ q|qwW d    n1 sw   Y  t| j|d	gt | d}tddd|id}t|gd *}t||d}|| D ]}	t
|	j|D ]\}}|j|j	 ksJ qqW d    n1 sw   Y  t| j|d	gt | d}tddd|id}t|gd ,}t||d}|| D ]}	t
|	j|D ]\}}|j|jks%J qqW d    d S 1 s5w   Y  d S )NNFP   …...)r6   variants:)-   —   –-----   ——)z

A	Babr=   r>   rA   rB   rC   rD   rE   rF   )_SPr%   rH   r%   r%   r%   r<   r<   r@   r@   r@   r@   r@   r@   TFr   r   )r5   spacesr6   g?      ?single)levellowerorth_variants
   	augmenterr1   )r5   rM   r2   )lenr   r9   r   r   r	   list	referencetextrQ   zip)r   rO   r5   r6   rM   r;   rU   r   readerexampletokenex_token	doc_tokenr   r   r   test_make_orth_variants.   sf   




$r`   c                 C   s  t dd}t|g}t||d}t|| }W d    n1 s!w   Y  |d }|jj|j ks5J |jj|j ks@J dd |jD }dd |jjD |ksUJ t	|jj|jD ]\}}	|j|	j kskJ q]dd |D dd |jD ks}J d	d |jD d	d |D ksJ g d
}
t
| j|
d}t|g}t||d}t|| }W d    n1 sw   Y  |d }|jj|j ksJ |jj|j ksJ dd |jD dd |
D ksJ dd |jD dd | |j D ksJ d S )Nr1   )rP   rT   r   c                 S      g | ]
}|j |j|jfqS r   startendlabel.0er   r   r   
<listcomp>m       z,test_lowercase_augmenter.<locals>.<listcomp>c                 S   ra   r   rb   rf   r   r   r   ri   n   rj   c                 S      g | ]}|j qS r   ent_iobrg   tr   r   r   ri   q       c                 S   rk   r   )pos_rn   r   r   r   ri   r   rp   )rG   rI   zCCC.)r5   c                 S   rk   r   rY   rn   r   r   r   ri   ~   rp   c                 S   s   g | ]}|  qS r   )rQ   rn   r   r   r   ri   ~       c                 S   rk   r   rr   rn   r   r   r   ri      rp   )r   r   r	   rW   rX   rY   rQ   	predictedr8   rZ   r   r9   make_doc)r   r;   rU   r   r[   corpusegr8   ref_entorig_entr5   r   r   r   test_lowercase_augmentere   s6   
"""rz   c           	      C   s   ddt fdd}t|g}t|| d}t|| }W d    n1 s%w   Y  d}d}|d j|ks7J |d jj|ksAJ |d jj|ksKJ |d	 j|ksTJ |d	 jj|ks^J |d	 jj|kshJ d
d |jD }dd |d jjD |ksJ dd |d	 jjD |ksJ d S )NF	randomizec                    s    fdd}|S )Nc                 3   st    |j } rdd |D }n	dd t|D }| }| d|}dd |D |d d< |V  |||V  d S )Nc                 S   s(   g | ]}t   d k r| n| qS )rN   )randomrQ   upper)rg   cr   r   r   ri         ( zftest_custom_data_augmentation.<locals>.create_spongebob_augmenter.<locals>.augment.<locals>.<listcomp>c                 S   s(   g | ]\}}|d  r|  n| qS )r   )rQ   r}   )rg   ir~   r   r   r   ri      r   r/   c                 S   rk   r   rr   rn   r   r   r   ri      rp   token_annotationORTH)rY   	enumerateto_dictru   join	from_dict)r   r\   rY   chexample_dictr;   r{   r   r   augment   s   zRtest_custom_data_augmentation.<locals>.create_spongebob_augmenter.<locals>.augmentr   )r{   r   r   r   r   create_spongebob_augmenter   s   zAtest_custom_data_augmentation.<locals>.create_spongebob_augmenterrT   z4Sarah 's sister flew to Silicon Valley via London . z4SaRaH 's sIsTeR FlEw tO SiLiCoN VaLlEy vIa lOnDoN . r      c                 S   ra   r   rb   rf   r   r   r   ri      rj   z1test_custom_data_augmentation.<locals>.<listcomp>c                 S   ra   r   rb   rf   r   r   r   ri      rj   c                 S   ra   r   rb   rf   r   r   r   ri      rj   )F)boolr   r	   rW   rY   rX   rt   r8   )	r   r;   r   r   r[   rv   	orig_text	augmentedr8   r   r   r   test_custom_data_augmentation   s    "r   c              
   C   s  d}g d}g d}g d}g d}g d}g d}g d}t | j|||||||d	}	|	j|ks2J t| ||	}
t| |
d
d}|jjd jdksLJ t| |
d
d}|jjd jdks^J t| |
d
d}|jjd jdkspJ t| |
d
d}|jjd jdksJ tt	|	d D ]}t| |
d
|}|j| j
sJ dd |jD |d | dg ||d   ksJ dd |jD |d | d
g ||d   ksJ dd |jD |d | dg ||d   ksJ |jdrJ |jdrJ tdd |jD rJ t	t|	jdksJ |dkr"|j| jjdks!J n|j| jj|d ks0J dD ]1}t| |d|}td d |jD rHJ t	t|	jdksTJ |j| jj|d ksbJ q2t	|	jt	|jjksrJ td!d" |jD sJ |jjD ]}|d j
rJ |d# j
rJ qqd$|
jd _t| |
d
d}|j|
jjksJ d%|
jd _|
jdd g|
jjd&< t| |
d
d}|j|
jjksJ |
jjd&= t|	ddd'd(d)g|
j_t| |
d
d}|j|
jjksJ d S )*Nz?They flew to New York City.
Then they drove to Washington, D.C.)Theyr   r   NewYorkCityr"   
Thentheydrover   
Washington,D.C.)TTTTTFFFTTTTFTF)PRPr&   r'   r#   r#   r#   r"   rL   RBr   r&   r'   r#   r   r#   )r   flyr   r   r   r   r"   r   thenr   driver   r   r   r   )r   r   r         r   r   rS   rS   rS   rS   rS         r   )nsubjROOTprepcompoundr   pobjpunctdepadvmodr   r   r   r   r   appos)r.   r/   r.   r0   I-GPEr   r.   r.   r.   r.   r.   r.   r0   r.   r0   )r5   rM   r6   lemmasheadsdepsr8       r   zNew York Cityr   zNew  York Cityr   zNew York  City   r   c                 S   rk   r   )tag_rn   r   r   r   ri      rp   z0test_make_whitespace_variant.<locals>.<listcomp>rL   c                 S   rk   r   )lemma_rn   r   r   r   ri      rp   c                 S   rk   r   )dep_rn   r   r   r   ri      rp   r   r$   MORPHc                 S      g | ]}|j jqS r   headr   rn   r   r   r   ri      rs   r   )r      rS   z		
c                 S   r   r   r   rn   r   r   r   ri      rs   c                 s   s    | ]}|j d kV  qdS )r   Nrl   rn   r   r   r   	<genexpr>   s    z/test_make_whitespace_variant.<locals>.<genexpr>r/   r   spansENTQ123)re   kb_id)r   r9   rY   r
   ru   r   rX   r8   rangerV   is_spacehas_annotationr   rW   sentsr   r   anyr   r   r   )r   rY   r5   rM   r6   r   r   r   r8   r;   r\   mod_exr   jmod_ex2entr   r   r   test_make_whitespace_variant   s~   
222
 
r   )r   )r|   
contextlibr   pytestspacy.lang.enr   (spacy.pipeline._parser_internals.nonprojr   spacy.tokensr   r   r   spacy.trainingr	   r
   spacy.training.augmentr   r   r   utilr   r   fixturer   r;   markfilterwarningsr`   rz   r   r   r   r   r   r   <module>   s*    



6

 