o
    iN                     @   s  d dl Z d dlmZ d dlmZ d dlmZmZ d dlm	Z	m
Z
 d dlmZ d dlmZ d dlmZ d d	lmZ d d
lmZ d dlmZ d dlmZ ddlmZmZ dg dg ddfdg dg ddfgZdg dg ddfdg dg ddfgZdg dg ddfgZdZe jdd Z e jdd Z!d d! Z"e j#$d"d#d$ Z%e j#$d%d&d' Z&e j#$d%d(d) Z'e j#$d*e j#j(d+d,d-d. Z)d/d0 Z*e j#j+d1d,e j#,d2d3ggd4d5 Z-e j#j+d1d,d6d7 Z.d8d9 Z/d:d; Z0e j#j+d1d,d<d= Z1d>d? Z2d@dA Z3e j#,dBddCgdDdE Z4e j#,dBddCgdFdG Z5e j#j6e j#,dBddCge j#,dHdIeddJdKddLdMdNeddJdKddLdMgdOdP Z7dQdR Z8dSdT Z9dS )U    N)assert_equal)Adam)registryutil)DEPNORM)English)DependencyParser)DEFAULT_PARSER_MODEL)DEFAULT_TOK2VEC_MODEL)Doc)Example)Vocab   )apply_transition_sequencemake_tempdirz&They trade mortgage-backed securities.)   r      r      r   r   )nsubjROOTcompoundpunctnmoddobjr   headsdepszI like London and Berlin.)r   r   r   r   r   r   )r   r   r   ccconjr   )r   r   r   r   r   r   )r   r   r   r   r   r   I like London.)r   r   r   N)r   r   r   Ng?c                   C   s   t tdd idS )Nc                 S   s   | S N )sr"   r"   Q/home/ubuntu/.local/lib/python3.10/site-packages/spacy/tests/parser/test_parse.py<lambda>C       zvocab.<locals>.<lambda>)lex_attr_getters)r   r   r"   r"   r"   r$   vocabA   s   r(   c                    s   | j d dti}tj|ddd }t| | d jd< d jd<  d	   fd
d t	d}t
dD ]"}i }t| g dd}t|g dg dd} j|g||d q8 S )Nr   modelTvalidater   token_vector_width    hidden_widthleftc                      
   t  gS r!   _parser_exampler"   parserr"   r$   r%   P      
 zparser.<locals>.<lambda>gMbP?
   abcdwordsr   r      r?   )r/   r   r/   r   r   sgdlosses)stringsaddr
   r   resolver	   cfg	add_label
initializer   ranger   r   	from_dictupdate)r(   rF   r)   rA   irB   docexampler"   r3   r$   r4   F   s"   



r4   c                 C   s0   t | jg dd}g dg dd}t||S )Nr7   r<   r>   )rightr   r/   r   r   )r   r(   r   rJ   )r4   rM   goldr"   r"   r$   r2   ]   s   r2   i
  c                 C   sD   g d}g d}dgt | }t| |||d}|d jdu s J dS )zATest that deprojectivization doesn't mess up sentence boundaries.)Whenwewriteorcommunicate	virtually,rR   canhideourtruefeelings.)r   r   	   r   r   r   r^   r^   r^   r^      r_   r^   r^   depr=   r   r   r   FN)lenr   is_sent_start)en_vocabr=   r   r   rM   r"   r"   r$   test_issue2772c   s
   re   i  c                     sn   ddi} t jdtiddd }tt |fi |   d d jvs%J   fdd	 d jvs5J d
S )zBTest that the parser doesn't have subtok label if not learn_tokenslearn_tokensFr)   Tr*   r   subtokc                      r0   r!   r1   r"   r3   r"   r$   r%   {   r5   z*test_issue3830_no_subtok.<locals>.<lambda>Nr   rE   r
   r	   r   rG   labelsrH   configr)   r"   r3   r$   test_issue3830_no_subtokq      
rl   c                     sn   ddi} t jdtiddd }tt |fi |   d d jvs%J   fdd d jv s5J d	S )
zATest that the parser does have subtok label if learn_tokens=True.rf   Tr)   r*   r   rg   c                      r0   r!   r1   r"   r3   r"   r$   r%      r5   z,test_issue3830_with_subtok.<locals>.<lambda>Nrh   rj   r"   r3   r$   test_issue3830_with_subtok   rm   rn   i$  zNot fixed yet)reasonc                 C   s:   t | jg dd}d|d _| |}|d jdksJ d S )Nr7   r<   Fr   )r   r(   rc   )r4   rM   r"   r"   r$   test_partial_annotation   s   
rp   c                 C   sJ   g d}g d}g d}t | |||d}|D ]}|jdks"J |jqd S )N)rL   dozn'thaveother
assistance)r?   r?   r?   r?   r   r?   )r   auxnegr   amodr   ra   r   )r   r`   text)rd   r=   r   r   rM   tr"   r"   r$   test_parser_root   s   rz   z=The step_through API was removed (but should be brought back)r=   Helloc                 C   sd   t | |dgdgd}t|dksJ ||}W d    n1 s"w   Y  |d jdks0J d S )Nr   r   ra   r   )r   rb   step_throughr`   )rd   	en_parserr=   rM   _r"   r"   r$   #test_parser_parse_one_word_sentence   s   r   c                 C   s|   g d}g d}t | |d}t||| |d jjdksJ |d jjdks(J |d jjdks2J |d jjdks<J d S )N)Iatethepizzawith	anchoviesr]   )L-nsubjSzL-detr<   r   r   r   r?   )r   r   headrL   )rd   r}   r=   
transitionrM   r"   r"   r$   test_parser_initial   s   r   c                 C   s   g d}g d}dgt | }t| |||d}t t|d jdks$J t t|d jdks1J t t|d jdks>J t t|d jdksKJ t t|d jd	ksXJ t t|d jdkseJ t t|d jd
ksrJ d S )N)Thefourwheelsonr   busturnedquickly)r   r      r   r   r?   r   r   r`   ra   r   r   r?   r   r   r   )rb   r   listleftsrightschildrensubtree)rd   r}   r=   r   r   rM   r"   r"   r$   test_parser_parse_subtrees   s   r   c                 C   s   g d}g d}g d}g d}t | ||||d}| }|jD ]}|j|d|jid q!W d    n1 s8w   Y  |d jd	ksFJ |d
 jdksOJ |d jdksXJ |d jdksaJ d S )N)Aphraser   anotherr   occurs)r   r   r   r   r   r   )detr   prepr   pobjr   )DETNOUNADPr   r   VERB)r=   r   r   poslemma)attrsr   zA phraser   r   r   zanother phraser?   r   )r   
retokenizenoun_chunksmergelemma_rx   )rd   r=   r   r   r   rM   retokenizernpr"   r"   r$   test_parser_merge_pp   s   

r   c                 C   s@  g d}g d}t | |d}t||| |d jdksJ |d jdks&J |d jjdks0J |d jjdks:J |d jjdksDJ |d jdksMJ |d jdksVJ |d jjdks`J |d jjdksjJ |d jjdkstJ |d jdks}J |d jdksJ |d jjdksJ |d jjdksJ |d jjdksJ |d jdksJ |d jdksJ |d jjdksJ |d jjdksJ |d jjdksJ |d jdksJ |d jdksJ |d jjdksJ |d jjdksJ |d jjdksJ g d	}t | |d}t||| |d jdksJ |d jdks)J |d jjdks4J |d jjdks?J |d jjdksJJ |d jdksTJ |d jdks^J |d jjdksiJ |d jjdkstJ |d jjdksJ |d jdksJ |d jdksJ |d jjdksJ |d jjdksJ |d jjdksJ |d jdksJ |d jdksJ |d jjdksJ |d jjdksJ |d jjdksJ |d jdksJ |d jdksJ |d jjdksJ |d jjdksJ |d jjdksJ d S )
N)r8   r9   r:   r;   e)R-nsubjDr   r   r   zR-ROOTr<   r   r   r   r   r?   )r   r   r   r   r   r   r   )r   r   n_leftsn_rights	left_edgerL   
right_edger   )rd   r}   r=   r   tokensr"   r"   r$   $test_parser_arc_eager_finalize_state   sr   r   c                 C   s   g d}g d}g d}t | |||d}tt|D ]}|dks$|dkr.|| jdu s-J q|| jdu s7J q|jD ]}|D ]	}|j|v sHJ q?q;d S )	N)&EinSatzr]   u	   AußerdemistZimmerdavonu
   überzeugtrW   dassauchzepige-
netischeMechanismeneineRollespielenrW   alsou	   VorgängerW   dier   sichdarauf	auswirkenrW   welcheGene	abgelesenwerdenundr   r   nichtr]   r   )&r   r   r      r   r      r   r         r      r   r      r   r   r      r   r         r   r   r   r         r      r   r-   r   r   r   $   )&nkr   r   mor   sboppdr   cpr   r    r   r   r   oarer   r   appr   r   r   r   r   rcr   r   r   ocr   cdr   r   ngr   r   )r=   r   r   r   r?   TF)r   rI   rb   rc   sentsr   )rd   r=   r   r   rM   rL   senttokenr"   r"   r$   test_parser_set_sent_starts%  s   
r   c                 C   sF   dddd}dt i}tj|ddd }t| |fi | t| | d S )NFr   d   )rf   min_action_frequpdate_with_oracle_cut_sizer)   Tr*   )r
   r   rE   r	   )rd   rk   rF   r)   r"   r"   r$   test_parser_constructor6  s   r   	pipe_namebeam_parserc                    s   t  }|| }g  tD ]"\}} t||| |dg D ]}|d ur-|| q"q|j	 fddd}t
dD ]}i }|j ||d q=||  dk sRJ d}	||	}
|
d	 jd
ksaJ |
d jdksjJ |
d	 jjdkstJ |
d jjdks~J d S )Nr   c                      s    S r!   r"   r"   train_examplesr"   r$   r%   M  r&   z&test_incomplete_data.<locals>.<lambda>)get_examples   r@   -C6?I like securities.r   r   r   r   r   )r   add_pipePARTIAL_DATAappendr   rJ   make_docgetrG   rH   rI   rK   dep_r   rL   )r   nlpr4   rx   annotationsr`   	optimizerrL   rB   	test_textrM   r"   r   r$   test_incomplete_dataB  s*   

r  c                    s&  t    | }g }tD ]\}}|t || |dg D ]}|| q"q 	 }t
dD ]}i } j|||d q3||  dk sHJ d}	 |	}
|
d jdksWJ |
d jd	ks`J |
d
 jdksiJ |
d jjdkssJ |
d jjdks}J |
d
 jjdksJ t O} | t|}||	}|d jdksJ |d jd	ksJ |d
 jdksJ |d jjdksJ |d jjdksJ |d
 jjdksJ W d    n1 sw   Y  g d}dd  |D }dd  |D }dd  fdd|D D }t|| t|| d S )Nr      r@   r   r   r   r   r   r   r?   r   r   )zJust a sentence.z$Then one more sentence about London.zHere is another one.r    c                 S      g | ]}| tgqS r"   to_arrayr   .0rM   r"   r"   r$   
<listcomp>      z'test_overfitting_IO.<locals>.<listcomp>c                 S   r  r"   r  r	  r"   r"   r$   r    r  c                 S   r  r"   r  r	  r"   r"   r$   r    r  c                    s   g | ]} |qS r"   r"   )r
  rx   r   r"   r$   r    s    )r   r   
TRAIN_DATAr   r   rJ   r   r   rG   rH   rI   rK   r   r   rL   r   to_diskr   load_model_from_pathpiper   )r   r4   r   rx   r  r`   r  rL   rB   r  rM   tmp_dirnlp2doc2textsbatch_deps_1batch_deps_2no_batch_depsr"   r  r$   test_overfitting_IO\  sL   



r  parser_configzspacy.TransitionBasedParser.v1F@   T)z@architecturestok2vec
state_typeextra_state_tokensr.   maxout_pieces	use_upperzspacy.TransitionBasedParser.v2c                 C   s   d|i}t  }|j| |d}g }tD ]\}}|t||| |dg D ]}|| q(q|	 }	t
dD ]}
i }|j||	|d q9d S )Nr)   rk   r   r   r@   )r   r   r  r   r   rJ   r   r   rG   rH   rI   rK   )r   r  pipe_configr   r4   r   rx   r  r`   r  rL   rB   r"   r"   r$   test_parser_configs  s   r#  c                  C   s^  d} d}t  }| |d}|jd|d}g }tD ]\}}|t||| |dg D ]}|| q-q|	 }	t
dD ]}
i }|j||	|d q>d	}||}|g}||}||\}}t
t|D ]E}|jD ]}|d
 ||f }d
t |  krdt ksJ  J qlt
t|D ]}
|d
 ||
f }d
t |  krdt ksJ  J qqgd S )Nr   r   
beam_widthbeam_densityr   r!  r   r6   r@   r   r   r   )r   r   CONFLICTING_DATAr   r   rJ   r   r   rG   rH   rI   rK   predictscored_parsesrb   ri   eps)r%  r&  r   rk   r4   r   rx   r  r`   r  rL   rB   r  rM   docsbeamshead_scoreslabel_scoresjlabellabel_score
head_scorer"   r"   r$   test_beam_parser_scores  s>   


&&r3  c                  C   s:  t  } d}d}||d}| jd|d}g }tD ]\}}|t| || |dg D ]}|| q-q| 	 }	t
dD ]}
i }| j||	|d q>|d dk sSJ d	}| |g}||}||\}}|d
 }|d
 }|d tjdtdks|J |d tjdtdksJ |d tjdtdksJ |d tjdtdksJ |d tjdtdksJ |d tjdtdksJ |d tjdtdksJ |d tjdtdksJ |d tjdtdksJ |d tjdtdksJ |d tjdtdksJ |d tjdtdksJ |d tjdtdksJ |d tjdtdks(J |d tjdtdks6J |d tjdtdksDJ |d tjdtdksRJ |d tjdtdks`J t .}| | t|}||g}|d}||}||\}}|d
 }|d
 }|d tjdtdksJ |d tjdtdksJ |d tjdtdksJ |d tjdtdksJ |d tjdtdksJ |d tjdtdksJ |d tjdtdksJ |d tjdtdksJ |d tjdtdksJ |d tjdtdksJ |d tjdtdks(J |d tjdtdks6J |d tjdtdksDJ |d tjdtdksRJ |d tjdtdks`J |d tjdtdksnJ |d tjdtdks|J |d tjdtdksJ W d    d S 1 sw   Y  d S ) Nr   r   r$  r   r!  r   r   r@   r   r   )r   r   g      ?)abs)r   r   g        )r   r   )r   r   )r   r   )r   r   )r?   r   )r?   r   )r?   r   )r   r   )r   r   )r   r   )r   r   )r   r   )r   r   )r?   r   )r?   r   )r?   r   )r   r   r  r   r   rJ   r   r   rG   rH   rI   rK   r(  r)  pytestapproxr*  r   r  r   r  get_pipe)r   r%  r&  rk   r4   r   rx   r  r`   r  rL   rB   r  r+  r,  r-  r.  r  r  docs2parser2beams2head_scores2label_scores2r"   r"   r$   test_beam_overfitting_IO  s   





$r=  ):r5  numpy.testingr   	thinc.apir   spacyr   r   spacy.attrsr   r   spacy.lang.enr   spacy.pipeliner	   spacy.pipeline.dep_parserr
   spacy.pipeline.tok2vecr   spacy.tokensr   spacy.trainingr   spacy.vocabr   r   r   r  r'  r   r*  fixturer(   r4   r2   markissuere   rl   rn   xfailrp   rz   skipparametrizer   r   r   r   r   r   r   r  r  slowr#  r3  r=  r"   r"   r"   r$   <module>   s    










	

I

3
&