o
    i                     @   s   d dl Z d dlmZmZ d dlmZ d dlmZ d dlm	Z	 d dl
mZ d dlmZ d dlmZ d d	lmZ d d
lmZ d dlmZ dd Zg dZdg dg ddfdg dg ddfgZdd Zdd Zdd Zdd Zdd Zd d! ZdS )"    N)assert_almost_equalassert_equal)get_current_ops)util)MORPH)English)Language)
Morphology)make_tempdir)Doc)Examplec                  C   sT   t  } | d}|d tt |d W d    d S 1 s#w   Y  d S )NmorphologizerzFeat=A	   )r   add_pipe	add_labelpytestraises
ValueErrornlpr    r   [/home/ubuntu/.local/lib/python3.10/site-packages/spacy/tests/pipeline/test_morphologizer.pytest_label_types   s   

"r   )Feat=NFeat=VFeat=JzI like green eggs)r   r   r   r   )NOUNVERBADJr   )morphsposzEat blue ham)r    r!   )r!   r   r!   c               	      s   t  } | dd}| jddtddd}g  i }tD ]}|| || qtD ]} t| 	|d |d  q)| j
 fd	d
d |jdd  D \}}t }|| |d d }	|| |d d }
t|
|	 d d S )Nr   no_label_smoothinglabel_smoothingg?)r#   )configr      c                          S Nr   r   train_examplesr   r   <lambda>4       z&test_label_smoothing.<locals>.<lambda>get_examplesc                 S      g | ]}|j qS r   )	predicted).0egr   r   r   
<listcomp>6       z(test_label_smoothing.<locals>.<listcomp>g+?)r   r   dictTAGSr   
TRAIN_DATAappendr   	from_dictmake_doc
initializemodelbegin_updater   to_numpyget_lossr   )r   morph_no_lsmorph_lslossestagt
tag_scoresbp_tag_scoresopsno_ls_gradsls_gradsr   r(   r   test_label_smoothing&   s(   
"rI   c                  C   sH   t  } | d tt |   W d    d S 1 sw   Y  d S )Nr   )r   r   r   r   r   r:   r   r   r   r   test_no_label>   s
   

"rK   c               	      sV   t  } | d g  tD ]} t| |d |d  q| j fddd d S )Nr   r   r%   c                      r&   r'   r   r   r(   r   r   r*   K   r+   z%test_implicit_label.<locals>.<lambda>r,   )r   r   r6   r7   r   r8   r9   r:   )r   rC   r   r(   r   test_implicit_labelE   s   
"rL   c                  C   s   t  } | d}|dtj d  |dtj d  |   tt |dtj d  W d    d S 1 s;w   Y  d S )Nr   POSr   r   r   )	r   r   r   r	   	FIELD_SEPr:   r   r   r   r   r   r   r   test_no_resizeN   s   
"rO   c               	      s   t  } | d}|dtj d  g  tD ]} t| 	|d |d  q| 
  | j
 fddd tt | j
d	d d W d    n1 sOw   Y  tt | j
 d W d    d S 1 skw   Y  d S )
Nr   rM   r   r   r%   c                      r&   r'   r   r   r(   r   r   r*   b   r+   z*test_initialize_examples.<locals>.<lambda>r,   c                   S   s   d S r'   r   r   r   r   r   r*   d   r+   )r   r   r   r	   rN   r6   r7   r   r8   r9   r:   r   r   	TypeError)r   r   rC   r   r(   r   test_initialize_examplesY   s   
""rQ   c               	      s  t    d g tD ]} t | d | d  q jfddd}tdD ]}i } j	||d q-|d d	k sBJ d
} |}g d}g d}dd |D |ks[J dd |D |ksfJ t
 ,} | t|}	|	|}
dd |
D |ksJ dd |
D |ksJ W d    n1 sw   Y  g d}dd  |D }dd  |D }dd  fdd|D D }t|| t||  d  d D ]}|jD ]}d|_qqو jfddd}tdD ]}i } j	||d q|d d	k s	J d
} |}g d}g d}dd |D |ks#J dd |D |ks/J g d}t jg d|d}dd |D }d d |D } d}d!|jd"< ||}d#d |D |kseJ d$d |D |ksqJ d%|jd"< d%|jd&< t jd'd(gd)dgd}||}d*d |D d+d,gksJ d!|jd"< d%|jd&< t jd'd(gd)d-gd}||}d.d |D d)d/gksJ d%|jd"< d!|jd&< t jd'd(gd)dgd}||}d0d |D d1d,gksJ  d  d D ]}|jD ]}|jd2krd3|_nd|_|d  qq jfd4dd} djd us'J tdD ]}i } j	||d q+|d d	k sBJ d
} |}g d}g d5}d6d |D |ks\J d7d |D |kshJ d S )8Nr   r   r%   c                      r&   r'   r   r   r(   r   r   r*   p   r+   z%test_overfitting_IO.<locals>.<lambda>r,   2   )sgdrA   gh㈵>zI like blue ham)r   r   r!   r!   )r   r   r   r!   c                 S      g | ]}t |jqS r   strmorphr0   rC   r   r   r   r2   |       z'test_overfitting_IO.<locals>.<listcomp>c                 S   r.   r   pos_rX   r   r   r   r2   }   r3   c                 S   rT   r   rU   rX   r   r   r   r2      rY   c                 S   r.   r   rZ   rX   r   r   r   r2      r3   )zJust a sentence.z$Then one more sentence about London.zHere is another one.zI like London.c                 S      g | ]}| tgqS r   to_arrayr   r0   docr   r   r   r2          c                 S   r\   r   r]   r_   r   r   r   r2      ra   c                 S   r\   r   r]   r_   r   r   r   r2      ra   c                    s   g | ]} |qS r   r   )r0   textrJ   r   r   r2      s    r!   c                      r&   r'   r   r   r(   r   r   r*      r+   )r!   r!   r!   r!   c                 S   rT   r   rU   rX   r   r   r   r2      rY   c                 S   r.   r   rZ   rX   r   r   r   r2      r3   )r   r   _)bluehamlike)wordsr   c                 S   rT   r   rU   rX   r   r   r   r2      rY   c                 S   r.   r   rZ   rX   r   r   r   r2      r3   F	overwritec                 S   rT   r   rU   rX   r   r   r   r2      rY   c                 S   r.   r   rZ   rX   r   r   r   r2      r3   TextendIrf   zFeat=A|That=A|This=Ac                 S   rT   r   rU   rX   r   r   r   r2      rY   zFeat=N|That=A|This=Ar   zThat=Bc                 S   rT   r   rU   rX   r   r   r   r2      rY   zFeat=V|That=Bc                 S   rT   r   rU   rX   r   r   r   r2      rY   r   re   r   c                      r&   r'   r   r   r(   r   r   r*      r+   )r   r   r   r   c                 S   rT   r   rU   rX   r   r   r   r2      rY   c                 S   r.   r   rZ   rX   r   r   r   r2      r3   )r   r   r6   r7   r   r8   r9   r:   rangeupdater
   to_diskr   load_model_from_pathpiper   remove_pipe	referencer[   r   vocabget_pipecfgrb   	set_morphlabels)inst	optimizerirA   	test_textr`   gold_morphsgold_pos_tagstmp_dirnlp2doc2textsbatch_deps_1batch_deps_2no_batch_depsexampletokenr   orig_morphsorig_pos_tagsr   r   )r   r)   r   test_overfitting_IOi   s   
"

















r   )r   numpy.testingr   r   	thinc.apir   spacyr   spacy.attrsr   spacy.lang.enr   spacy.languager   spacy.morphologyr	   spacy.tests.utilr
   spacy.tokensr   spacy.trainingr   r   r5   r6   rI   rK   rL   rO   rQ   r   r   r   r   r   <module>   s4    	