o
    i(                     @   s  d dl Z d dlmZ d dlZd dlmZ d dlmZ d dl	m
Z
 d dlmZ d dlmZ d dlmZ d dlmZ d d	lmZ d
dg difddg difgZd
dg difdg dg ddfgZdd Zdd Zejdddd Zejdddd Zejdddd Zd d! Zd"d# Zd$d% Z d&d' Z!d(d) Z"ee# e# d*d+ Z$eej#d,d-ej#d,d-d.d/ Z%d0d1 Z&d2d3 Z'dS )4    N)given)util)English)Language)	EditTrees)StringStore)Example)make_tempdirzShe likes green eggslemmas)shelikegreeneggzEat blue ham)eatblueham) r   r   r   zHe hates green eggs)Hehatesr   eggs)r   r   er   r   )wordsr
   c               	      s@  t  } | d}g  tD ]} t| |d |d  q| j fddd t	t
 | jdd d W d    n1 sAw   Y  t	t
 | j fddd W d    n1 s`w   Y  t	t
 | jd	d d W d    n1 s}w   Y  t	t
 | j d W d    d S 1 sw   Y  d S )
Ntrainable_lemmatizerr      c                          S N r   train_examplesr   b/home/ubuntu/.local/lib/python3.10/site-packages/spacy/tests/pipeline/test_edit_tree_lemmatizer.py<lambda>)       z*test_initialize_examples.<locals>.<lambda>get_examplesc                   S   s   d S r   r   r   r   r   r    r!   +   r"   c                      s    d S )Nr   r   r   r   r   r    r!   -   s    c                   S   s   g S r   r   r   r   r   r    r!   /   r"   )r   add_pipe
TRAIN_DATAappendr   	from_dictmake_doc
initializepytestraises	TypeError)nlp
lemmatizertr   r   r    test_initialize_examples"   s$   
""r1   c                     s   t  } | d}d|_g  tD ]} t| |d |d  q| j fddd t  }|d}|j fdd|j	d |j
ddd	d
dksLJ |j	ddddddddddddddd	ddddddddddddddgddkszJ d S )Nr   r   r   c                      r   r   r   r   r   r   r    r!   ;   r"   z-test_initialize_from_labels.<locals>.<lambda>r#   c                      s    d d S )Nr   r   r   r   r   r    r!   C   s    r$   labels      )r   r5         Ss)origsubstl    )
prefix_len
suffix_lenprefix_treesuffix_treer   Er      )treesr3   )r   r%   min_tree_freqr&   r'   r   r(   r)   r*   
label_data
tree2labelr.   r/   r0   nlp2lemmatizer2r   r   r    test_initialize_from_labels4   sR   
"

rI   top_k)r   rA      c              	      s   dddddifdddddifg}t  }|jdd| id	 |d
 g  |D ]} t||d |d  q'tt |j	 fddd W d    d S 1 sUw   Y  d S )NzI'm so happy.catsg      ?g        )POSITIVENEGATIVEzI'm so angryr   rJ   configtextcatr   r   c                      r   r   r   r   r   r   r    r!   y   r"   ztest_no_data.<locals>.<lambda>r#   )
r   r%   r'   r   r(   r)   r+   r,   
ValueErrorr*   )rJ   TEXTCAT_DATAr.   r0   r   r   r    test_no_datai   s   
""rT   c              	      s~  t  }|jdd| id}d|_g  tD ]} t||d |d  q|j fddd}t	d	D ]}i }|j
 ||d
 q4|d dk sIJ d}||}|d jdksXJ |d jdksaJ |jdd  D dd\}	}
| |	\}
}|jjj}||d d dksJ ||d d dksJ ||d d dksJ ||d d dksJ ||d d dksJ d S )Nr   rJ   rO   r   r   c                      r   r   r   r   r   r   r    r!      r"   z&test_incomplete_data.<locals>.<lambda>r#   2   sgdlossesh㈵>She likes blue eggsr   r4   r   c                 S   s   g | ]}|j qS r   )	predicted).0egr   r   r    
<listcomp>   s    z(test_incomplete_data.<locals>.<listcomp>T)is_trainr5   )r   r%   rC   PARTIAL_DATAr'   r   r(   r)   r*   rangeupdatelemma_modelget_lossopsxpcount_nonzero)rJ   r.   r/   r0   	optimizerirX   	test_textdocscores_dXrg   r   r   r    test_incomplete_data|   s.   "
rp   c              	      s\  t  }|jdd| id}d|_g  tD ]} t||d |d  q|j fddd}t	d	D ]}i }|j
 ||d
 q4|d dk sIJ d}||}|d jdksXJ |d jdksaJ |d jdksjJ |d jdkssJ t :}	||	 t|	}
|
|}|d jdksJ |d jdksJ |d jdksJ |d jdksJ W d    n1 sw   Y  | }t  }|jdd| id || ||}|d jdksJ |d jdksJ |d jdksJ |d jdksJ t|}t|}||}|d jdksJ |d jdksJ |d jdks"J |d jdks,J d S )Nr   rJ   rO   r   r   c                      r   r   r   r   r   r   r    r!      r"   z%test_overfitting_IO.<locals>.<lambda>r#   rU   rV   rY   rZ   r   r   r4   r   r5   r   )r   r%   rC   r&   r'   r   r(   r)   r*   ra   rb   rc   r   r	   to_diskload_model_from_pathto_bytes
from_bytespickledumpsloads)rJ   r.   r/   r0   ri   rj   rX   rk   rl   tmp_dirrG   doc2	nlp_bytesnlp3doc3nlp4doc4r   r   r    test_overfitting_IO   sT   "






r   c                  C   sH   t  } | d tt |   W d    d S 1 sw   Y  d S )Nr   )r   r%   r+   r,   rR   r*   )r.   r   r   r    test_lemmatizer_requires_labels   s
   

"r   c               	      s   t  } | d}d|_g  tD ]} t| |d |d  q| j fddd t  }|d}|j fdd|j	d |j
|j
ksHJ |j |j ksTJ d S )	Nr   r   r   c                      r   r   r   r   r   r   r    r!      r"   z,test_lemmatizer_label_data.<locals>.<lambda>r#   c                      r   r   r   r   r   r   r    r!      r"   r2   )r   r%   rC   r&   r'   r   r(   r)   r*   rD   r3   rB   rs   rF   r   r   r    test_lemmatizer_label_data   s   
"
r   c                  C   sN   t  } t| }|dd}||dksJ |dd}||dks%J d S )Ndeeltdelenz*(m 0 3 () (m 0 2 (s '' 'l') (s 'lt' 'n')))gedeeldz3(m 2 3 (s 'ge' '') (m 0 2 (s '' 'l') (s 'ld' 'n'))))r   r   addtree_to_str)stringsrB   treer   r   r    
test_dutch   s   r   c                  C   s   t  } t| }|dd |dd | }t| }|| t|t|ks*J tt|D ]}||||ks>J q0|dd |dd t|t|ksUJ d S )Nr   r   r   )r   r   r   rs   rt   lenra   r   )r   rB   btrees2rj   r   r   r    test_from_to_bytes   s   
r   c                  C   s   t  } t| }|dd |dd t| }t }|d }|| ||}W d    n1 s3w   Y  t|t|ksBJ tt|D ]}||||ksVJ qH|dd |dd t|t|ksmJ d S )Nr   r   r   zedit_trees.bin)	r   r   r   r	   rq   	from_diskr   ra   r   )r   rB   r   temp_dir
trees_filerj   r   r   r    test_from_to_disk  s    
r   c                 C   2   t  }t|}|| |}||| |ksJ d S r   r   r   r   applyformlemmar   rB   r   r   r   r    test_roundtrip+  s   r   ab)alphabetc                 C   r   r   r   r   r   r   r    test_roundtrip_small_alphabet3  s   r   c                  C   sF   t  } t| }|dd}||dd ksJ ||dd ks!J d S )Nr   r   deeldder   )r   rB   tree3r   r   r    test_unapplicable_trees<  s
   r   c                  C   s6   t  } t| }|dd}|dd}||ksJ d S )Nxyzr   )r   r   r   )r   rB   	no_changeemptyr   r   r    test_empty_stringsH  s
   r   )(ru   hypothesis.strategies
strategiesstr+   
hypothesisr   spacyr   spacy.lang.enr   spacy.languager   .spacy.pipeline._edit_tree_internals.edit_treesr   spacy.stringsr   spacy.trainingr   
spacy.utilr	   r&   r`   r1   rI   markparametrizerT   rp   r   r   r   r   r   r   textr   r   r   r   r   r   r   r    <module>   sP    5

#
5

