o
    it                     @   s  d dl Z d dlmZ d dlmZ d dlmZmZmZ ddl	m
Z
 e jdd Zd	d
 Zdd Zdd Zdd Zdd Zdd Zdd Zdd Zdd Zdd Ze jdddd d!iidd"iggid#gfd$d%d d&iiggid'gfdd d(iiggdd d!iidd"iggd%d d&iiggd)g d*fdd d+iiggdd,d+iiggd-d.gfgd/d0 Ze jd1d2d3gd4d5 Zd6d7 Ze jd8ed9d:d;d< Ze jd8ed9d=d>d? Ze jd@dAdBge jd1d2d3gdCdD ZdEdF Z dGdH Z!dIdJ Z"dKdL Z#dMdN Z$dOdP Z%dQdR Z&e j'dSdTdU Z(dVdW Z)dXdY Z*dZd[ Z+d\d] Z,d^d_ Z-d`da Z.dbdc Z/ddde Z0dfdg Z1dhdi Z2e jdjdkdldmgfdndogfdpdlgfdqdmgfdrdldogfdsdodmgfgdtdu Z3dvdw Z4dxdy Z5dzd{ Z6d|d} Z7e jd~ddigdlfddigdlfddigdfddigdlfddigdfddigdfddigdfddigdfddigdfddigdfddigdfddigdfddigdfddigdfddigdfddigdfddigdfgdd Z8e j9ddd Z:dd Z;dd Z<dd Z=dd Z>dd Z?dd Z@dd ZAdd ZBdd ZCdS )    N)Mock)Matcher)DocSpanToken   )clean_underscorec                 C   sT   ddiggddiddiggddiggd}t | }| D ]
\}}||| q|S )NORTH
JavaScriptGoogleNowLOWERjavaJS	GoogleNowJava)r   itemsadd)en_vocabrulesmatcherkeypatterns r   X/home/ubuntu/.local/lib/python3.10/site-packages/spacy/tests/matcher/test_matcher_api.pyr   
   s   

r   c                 C   s   t | }ddig}t|dksJ |d|g t|dks J |d d|vs+J |d|g d|v s8J |d\}}t|d sGJ d S )Nr	   testr   Rule   )r   lenr   removeget)r   r   patternon_matchr   r   r   r   test_matcher_from_api_docs   s   

r$   c                 C   sp   t | }t|dksJ t| g dd}tt || W d    n1 s(w   Y  t|jdks6J d S )Nr   )Thisisquite	somethingwords)r   r   r   pytestwarnsUserWarningents)r   r   docr   r   r   !test_matcher_empty_patterns_warns%   s   
r0   c                 C   sx   d}t | |dd}g d}dd |D }dd }t| }|jd	||d
 || |jdks1J |d jdks:J d S )Nu'   Wow 😀 This is really cool! 😂 😂 r)   )u   😀u   😃u   😂u   🤣u   😊u   😍c                 S   s   g | ]}d |igqS )r	   r   ).0emojir   r   r   
<listcomp>2   s    z0test_matcher_from_usage_docs.<locals>.<listcomp>c           
      S   s   || \}}}|j j| dkr| jd7  _||| }| }|| W d    n1 s0w   Y  || }	d|	j |	j _d S )NHAPPYg?happy emoji)vocabstrings	sentiment
retokenizemergetextnorm_)
r   r/   imatchesmatch_idstartendspanretokenizertokenr   r   r   label_sentiment4   s   
z5test_matcher_from_usage_docs.<locals>.label_sentimentr5   r#   r   r   r6   )r   splitr   r   r9   r=   )r   r<   r/   	pos_emojipos_patternsrF   r   r   r   r   test_matcher_from_usage_docs.   s   
rK   c                 C   s@   t | dksJ | dddigg d| v sJ d| vsJ d S )N   TESTr	   r   TEST2)r   r   )r   r   r   r   test_matcher_len_containsE   s   rO   c                 C   s   t | ddgd}ddigddiddigg}t| }t }t| }|d| t||dks0J t| }t }|jd||d t||dksIJ |jdksPJ d S )	Nabr)   TEXTNEW_APIr   NEW_API_CALLBACKrG   )r   r   r   r   r   
call_count)r   r/   r   r   r#   r   r   r   test_matcher_add_new_apiL   s   rV   c                 C   s&   t | jg dd}| |g ksJ d S )N)Ilikecheese.r)   )r   r7   r   r/   r   r   r   test_matcher_no_match[   s   r\   c                 C   s6   t | jg dd}| || jjd ddfgksJ d S )N)r
   r&   goodr)   r   r   r   r   r7   r8   r[   r   r   r   test_matcher_match_start`   s   $r_   c                 C   :   g d}t | j|d}| ||jjd ddfgksJ d S )N)rW   rX   r   r)   r   r   rL   r^   r   r*   r/   r   r   r   test_matcher_match_ende      $rb   c                 C   r`   )N)rW   rX   r   r   bestr)   r   r      r^   ra   r   r   r   test_matcher_match_middlek   rc   rf   c                 C   sJ   g d}t | j|d}| ||jjd ddf|jjd ddfgks#J d S )	N)rW   rX   r   r   andr   rd   r)   r   r   re   r         r^   ra   r   r   r   test_matcher_match_multiq   s   rj   zrules,match_locsr   r	   FUZZYr   r   r   re   r   r   r   rh   ri   r
   r   )rl   rm      	   JavascriptsFUZZY5)ABrn   c                 C   sX   g d}t | |d}t| }| D ]
\}}||| q|dd ||D ks*J d S )N)	TheyrX   Goggler   rg   Javbutnot	JvvaScrptr)   c                 S   s   g | ]	\}}}||fqS r   r   )r2   m_idrA   rB   r   r   r   r4      s    z,test_matcher_match_fuzzy.<locals>.<listcomp>)r   r   r   r   )r   r   
match_locsr*   r/   r   r   r   r   r   r   test_matcher_match_fuzzyz   s   #r}   set_opINNOT_INc                 C   st   dd|ddgiiddggi}t | }| D ]\}}|j||dd qg d	}t| |d
}t||dks8J d S )Nr   rk   r   r   +r	   OPLONGESTgreedyru   rX   rv   Noor)   r   )r   r   r   r   r   )r   r~   r   r   r   r   r*   r/   r   r   r   'test_matcher_match_fuzzy_set_op_longest   s   r   c                 C      ddddgidgdddggi}t | }| D ]\}}|j||d	d
 qg d}t|j|d}|||jjd ddfgksAJ d S )Nr   r   r   r   rv   )rk   r   r   r   r   r   r   r)   rL   re   r   r   r   r   r7   r8   r   r   r   r   r   r*   r/   r   r   r   %test_matcher_match_fuzzy_set_multiple      
r   fuzzynr   
   c                 C   s`   t | }|ddd| diigg dd tddD }t| |}t|||d ks.J d S )	Nr   r	   rk   c                 S   s   g | ]}d d|  qS )r   rP   r   )r2   r>   r   r   r   r4      s    z<test_matcher_match_fuzzyn_all_insertions.<locals>.<listcomp>r   r   r   )r   r   ranger   r   r   r   r   r*   r/   r   r   r   (test_matcher_match_fuzzyn_all_insertions   s
   
r   ri   c                 C   sT   t | }|ddd| diigg g d}t| |}t|||d ks(J d S )Nr   r	   rk   )r   	GoogleNuw
GoogleNuewGoogleNoweee
GiggleNuw3gouggle5Newr   r   r   r   r   r   r   r   r   'test_matcher_match_fuzzyn_various_edits   s
   
r   r   FIRSTr   c           
      C   s   dd|ddgiiddggi}t | }| D ]\}}|j|||d qg d}t|j|d	}||d
d}	t|	dks=J |dkrL|	d jdksJJ d S |	d jdksUJ d S )Nr   FUZZY2r   r   r   r   r   r   r)   Tas_spansr   r   r   z
Goggle Nooz	They like)r   r   r   r   r7   r   r<   )
r   r   r~   r   r   r   r   r*   r/   spansr   r   r   (test_matcher_match_fuzzyn_set_op_longest   s   r   c                 C   r   )Nr   r   r   r   rv   )FUZZY1r   r   r   r   r   r   r)   rL   re   r   r   r   r   r   &test_matcher_match_fuzzyn_set_multiple   r   r   c                 C   s   t | }t|jg dd}|dddii ddigg ||}t|dks'J |d dd	 d
ks3J t | }|dddii gg ||}|d dd	 dksRJ d	S )zBTest matcher allows empty token specs, meaning match on any token.rP   rQ   cr)   A.Cr	   rP   r   r   r   Nr   rL   zA.)r   r   r   r   r7   r   r   )r   r   r/   r?   r   r   r   test_matcher_empty_dict  s   r   c                 C   st   t | }t|jg dd}ddidddddig}|d	|g ||}t|d
ks,J |d d
d  dks8J d S )Nr   r)   r	   rP   Tr   )IS_ALPHAr   r   r   r   r   r   r   )r   r   r/   r"   r?   r   r   r   test_matcher_operator_shadow  s   r   c                 C   s   d  }d  }ddiddddddddig}ddiddiddiddiddig}| d	|g t| j|d
}t| |dksAJ t| j|d
}t| |dksRJ | d	|g t| |dkscJ d S )NHe said , " some words " ...z"He said , " some three words " ...r	   "!Tr   IS_PUNCTr   Quoter)   r   r   )rH   r   r   r7   r   )r   words1words2pattern1pattern2r/   r   r   r   test_matcher_match_zero'  s(   r   c                 C   s^   d  }ddidddddig}t| j} | d|g t| j|d}t| |d	ks-J d S )
Nr   r	   r   *Fr   r   r)   r   )rH   r   r7   r   r   r   )r   r*   r"   r/   r   r   r   test_matcher_match_zero_plus@  s   
r   c                 C   s   t | j}|dddigg t|jddgd}||}t|dks$J ddidddg}| d|g | |}t|d	ks@J d S )
NBasicPhilipper	   Philipper)   r   r   r   KleenePhilipper   )r   r7   r   r   r   )r   controlr/   mr"   r   r   r   test_matcher_match_one_plusI  s   
r   c                    s   t | }|dddiddigg t| g dd  fdd	| D }t|d
ks,J |d dks4J |d dks<J |d dksDJ dS )z;Test that patterns with "any token" {} work with operators.rM   r	   r   r   r   )r   helloworldr)   c                        g | ]\}}} || j qS r   r<   r2   _rA   rB   r/   r   r   r4   Z       z3test_matcher_any_token_operator.<locals>.<listcomp>rL   r   r   z
test hellor   ztest hello worldNr   )r   r   r?   r   r   r   test_matcher_any_token_operatorU  s   r   r   c                 C   s   t | }dd }tjd|dd ddidddiig}|d	|g t| dd
gd}||}t|dks5J t| ddgd}||}t|dksIJ d S )Nc                 S   s
   | j dv S )N)applebananar   rE   r   r   r   <lambda>d  s   
 z2test_matcher_extension_attribute.<locals>.<lambda>is_fruitTgetterforcer	   anr   HAVING_FRUITr   r)   r   aardvarkr   r   r   set_extensionr   r   r   )r   r   get_is_fruitr"   r/   r?   r   r   r    test_matcher_extension_attributea  s   r   c                 C   sz   t | }ddddgiig}|d|g t| g dd}||}t|dks(J t| d	gd}||}t|d
ks;J d S )Nr	   r   r   rP   A_OR_AN)r   rP   r   r)   r   r   r   r   r   r   r"   r/   r?   r   r   r   test_matcher_set_valuep  s   r   c                 C   s   t | }dddgiddddig}|d|g t| g d	d
}||}t|dks,J t| ddgd
}||}t|dks@J d S )Nr   rP   the?r   r	   house	DET_HOUSE)InrP   r   r)   r   myr   r   r   r   r   r   test_matcher_set_value_operator|  s   r   c                 C   s  t | }ddddgiig}|d|g t| g dd}t||dks&J |d	 d t||dks7J |d	 d
 t||dksHJ |d	 d t||dksYJ |d	 d t||dksjJ t | }ddddgiig}|d|g t| g dd}d|d	 _t||dksJ t | }ddg iig}|d|g t| g dd}d|d	 _t||d	ksJ tjdg d t | }dddddgiiig}|d|g t| g dd}dg|d	 j_	ddg|d j_	t||dksJ d S )NMORPH	IS_SUBSETFeat=Val
Feat2=Val2Mr   r)   rL   r   Feat=Val|Feat2=Val2Feat=Val|Feat2=Val2|Feat3=Val3r   )Feat=Val|Feat2=Val2|Feat3=Val3|Feat4=Val4TAGrs   rt   r   extdefaultr   CD
r   r   r   r   	set_morphtag_r   r   r   r   r   r   r"   r/   r   r   r   "test_matcher_subset_value_operator  sB   

r   c                 C   s   t | }ddg diig}|d|g t| g dd}t||dks&J |d d t||dks7J |d d	 t||d
ksHJ |d d t||d
ksYJ t | }ddddgiig}|d|g t| g dd}d|d _t||dksJ t | }dddgiig}|d|g t| g dd}d|d _t||d
ksJ t | }ddg iig}|d|g t| g dd}d|d _t||dksJ tjdg d t | }ddddgiiig}|d|g t| g dd}ddg|d j_	t||d
ksJ d S )Nr   IS_SUPERSETr   r   z
Feat3=Val3r   r   r)   r   r   r   r   r   r   rs   rt   rL   r   r   r   r   r   r   r   r   $test_matcher_superset_value_operator  sH   


r   c                 C   s4  t | }ddg diig}|d|g t| g dd}t||dks&J |d d t||d	ks7J |d d
 t||d	ksHJ |d d t||d	ksYJ |d d t||d	ksjJ t | }ddddgiig}|d|g t| g dd}d|d _t||d	ksJ t | }ddg iig}|d|g t| g dd}d|d _t||dksJ tjdg d t | }dddddgiiig}|d|g t| g dd}ddg|d j_	t||d	ksJ t | }dddddgiiig}|d|g t| g dd}dgdg|d j_	t||dks'J ddg|d j_	t||d	ks:J t | }dddg iiig}|d|g t| g dd}ddg|d j_	t||dksiJ t | }dddddgiiig}|d|g t| g dd}g |d j_	t||dksJ d S )Nr   
INTERSECTSr   r   r   r)   r   r   r   r   r   r   r   rs   rt   r   r   r   r   Abxr   r   r   r   r   %test_matcher_intersect_value_operator  sh   

r   c                 C   s`  t | }dddgiig}dddgiig}|d|g |d|g t| g dd}t||d	ks4J |d	 d t||d
ksEJ |d	 d t||d
ksVJ t | }ddddgiig}ddg diig}|d|g |d|g t| g dd}t||d	ksJ |d	 d t||dksJ |d	 d t||d
ksJ d S )Nr   r   zFeat1=Val1|Feat2=Val2zFeat2=Val2|Feat1=Val1r   Nr   r)   r   r   r   
Feat1=Val1r   )r   z
Feat1=Val3r   zFeat2=Val2,Val3|Feat1=Val1r   zFeat1=Val1,Val3|Feat2=Val2)r   r   r   r   r   )r   r   r   r   r/   r   r   r   test_matcher_morph_handling$  s,   r   c                 C   v   t | }dddiig}|d|g t| g dd}||}t|dks&J t| dgd}||}t|d	ks9J d S )
Nr	   REGEXz(?:a|an)r   r   rP   hir)   r   byer   r   r   r   r   r   test_matcher_regexB     r  c                 C   s~   t | }dddddgiiig}|d|g t| g dd}||}t|d	ks*J t| d
gd}||}t|dks=J d S )Nr	   r   r   (?:a)(?:an)r   r   r)   r   r   r   r   r   r   r   r   test_matcher_regex_set_inN     r  c                 C   s~   t | }dddddgiiig}|d|g t| g dd}||}t|d	ks*J t| d
gd}||}t|d	ks=J d S )Nr	   r   r   r  r  r   r   r)   r   r   r   r   r   r   r   test_matcher_regex_set_not_inZ  r  r  c                 C   r   )
NSHAPEr   z^[^x]+$	NON_ALPHA)99problemsr   r)   r   r   r   r   r   r   r   r   test_matcher_regex_shapef  r  r  zcmp, badz==rP   aaaz!=aaz>=z<=><c                 C   s   t | }d|diig}|d|g t| g dd}||}t|t|t| ks,J t| |d}||}t|dks>J d S )NLENGTHr   LENGTH_COMPARE)rP   r  r  r)   r   r   )r   cmpbadr   r"   r/   r?   r   r   r   test_matcher_compare_lengthr  s   r  c                 C   s   t | }dd }tjd|dd ddddd	giiig}|d
|g t| g dd}||}t|dks6J t| dgd}||}t|dksIJ d S )Nc                 S   s   d t| jS )N )joinreversedr<   r   r   r   r   r     s    z7test_matcher_extension_set_membership.<locals>.<lambda>r  Tr   r   r   eybihREVERSED)r   r   r   r)   r   r   r   r   )r   r   get_reversedr"   r/   r?   r   r   r   %test_matcher_extension_set_membership  s   r  c                 C   s   t | }tjdg d dddddgiiig}|d|g t| g dd	}dd
g|d j_t||dks7J dg|d j_t||dksHJ d|d j_t||dksXJ d S )Nr   r   r   r   rs   r   r   r   r)   rt   r   r   )r   r   r   r   r   r   r   r   r   r   r   r   'test_matcher_extension_in_set_predicate  s   r  c                 C   sT   t | }ddiddig}tt |d| W d    d S 1 s#w   Y  d S )NrR   r   r   rM   )r   r+   raises
ValueErrorr   )r   r   r"   r   r   r   test_matcher_basic_check  s
   "r!  c              	   C   s  t | dgd}d|d _t | dgd}d|d _d|d _|d d d|d _t | dgd}t| }|d	d
digg || t	t
 || W d    n1 sVw   Y  t	t
 || W d    n1 sow   Y  ||dd ||dd dD ]F}t| }|d	|digg || t	t
 || W d    n1 sw   Y  t	t
 || W d    n1 sw   Y  qt| }|d	ddigg || || || t| }|d	ddigg || || || d S )NTestr)   ROOTr   r   Xr   LEMMArM   DEPrP   T)allow_missing)r   POSr%  r	   rR   )r   dep_r   pos_r   lemma_r   r   r+   r  r   )r   doc1doc2doc3r   attrr   r   r   test_attr_pipeline_checks  sP   







r0  zpattern,textr   TIS_ASCIIIS_DIGIT1IS_LOWERIS_UPPERrs   IS_TITLEAaaar   rZ   IS_SPACE

IS_BRACKET[IS_QUOTEr   IS_LEFT_PUNCTz``IS_RIGHT_PUNCTz''IS_STOPr   SPACYLIKE_NUMLIKE_URLzhttp://example.com
LIKE_EMAILzmail@example.comc                 C   sT   t | }t| |dd}|d|g t|dksJ ||}t|dks(J d S )Nr1   r)   r   r   )r   r   rH   r   r   )r   r"   r<   r   r/   r?   r   r   r   $test_matcher_schema_token_attributes  s   rD  zignore:\[W036c                 C   s`   t | }tt |jdddiggg d W d   n1 s w   Y  |t| dgd dS )z0Test that on_match can only be None or callable.rM   rR   r   rG   Nr)   )r   r+   r  r   r   r   )r   r   r   r   r   test_matcher_valid_callback  s
   rE  c                 C   sV   t  }t| }ddig}|jd|g|d t| g dd}||}|||d| d S )Nr	   r   r   rG   r%   r&   rP   r   rZ   r)   r   r   r   r   r   assert_called_once_withr   mockr   r"   r/   r?   r   r   r   test_matcher_callback  s   
rK  c                 C   sZ   t  }t| }ddig}|jd|g|d t| g dd}||dd}|||d	| d S )
Nr	   r   r   rG   rF  r)   T)with_alignmentsr   rG  rI  r   r   r   %test_matcher_callback_with_alignments  s   
rM  c                 C   sn   d}t | j| d}|d d }|dd  }t| |dks!J t| |dks+J t| |dks5J d S )N%JavaScript is good but Java is betterr)   rL   re   r   r   )r   r7   rH   r   )r   r<   r/   span_js	span_javar   r   r   test_matcher_span  s   rQ  c                 C   s   d}t | j| d}| |dd}t|dksJ t|d ts"J |d jdks+J |d jdks4J t|d	 ts=J |d	 jd
ksFJ |d	 jd
ksOJ | |d	d dd}t|d	ksaJ t|d tsjJ |d jd
kssJ |d jd
ks|J dS )zTest the new as_spans=True API.rN  r)   Tr   r   r   r
   r   r   r   N)r   r7   rH   r   
isinstancer   r<   label_)r   r<   r/   r?   r   r   r   test_matcher_as_spans"  s   rT  c                 C   sv   t | jddgd}tt#}| |gD ]}q|jsJ dt|jd jv s)J W d    d S 1 s4w   Y  d S )Nr   r   r)   z
spaCy v3.0r   )	r   r7   r+   r,   DeprecationWarningpipeliststrmessage)r   r/   recordr   r   r   r   test_matcher_deprecated6  s   
"r[  c                 C   sn   t | }ddig}|d|g t| g dd}||}t|dks$J d|v s*J |d d|vs5J d S )Nr   r   r   rF  r)   r   )r   r   r   r   r    r   r   r   r   !test_matcher_remove_zero_operator?  s   

r\  c                 C   sL   t | ddgddgd}t| }|dddd	gg t||d
ks$J d S )NrP   rQ   rs   rt   )r*   tagsrM   r   r   )r   r   r   r   r   r   r   )r   r/   r   r   r   r   test_matcher_no_zero_lengthK  s   r_  c                    sj  t | }|dddigg t| g ddtdddd	td
ddd	g_t| g ddtdd
dd	g_fdd|D }fdd|D }t|dksWJ |d dks_J t|dksgJ t | }|ddddgg t| g dd t dddd	g _ fdd| D }t|dksJ |d dksJ |d dksJ |d dksJ dS )z0Test that patterns with ent_iob works correctly.r   ENT_IOBrW   )rW   visitedNewYorkrg   
Californiar)   r   re   GPE)labelrh   ri   )rW   ra  r   friendAliciaPERSONc                    r   r   r   r   )r,  r   r   r4   Z  r   z,test_matcher_ent_iob_key.<locals>.<listcomp>c                    r   r   r   r   )r-  r   r   r4   [  r   r   r   rc  r   )r`  r   )rW   ra  r   rg  AnnaMaria	Esperanza   c                    r   r   r   r   r   r   r   r4   f  r   rL   rk  zMaria Esperanzarl  N)r   r   r   r   r.   r   )r   r   matches1matches2r?   r   )r/   r,  r-  r   test_matcher_ent_iob_keyR  s,   "rp  c                    s4  t | g dd t| }dddg}|d|g  fdd| D }t|d	ks,J t| }dd
dg}|d|g  fdd| D }t|dksPJ t| }dddg}|d|g  fdd| D }t|dkstJ t| }dddg}|d|g  fdd| D }t|dksJ d S )N)
foobarrq  rq  rr  rq  rq  rq  rr  rr  r)   rq  z{3}r   rM   c                    r   r   r   r   r   r   r   r4   w  r   z1test_matcher_min_max_operator.<locals>.<listcomp>r   z{2,}c                    r   r   r   r   r   r   r   r4   ~  r   re   z{,2}c                    r   r   r   r   r   r   r   r4     r   rp   z{2,3}c                    r   r   r   r   r   r   r   r4     r   r^  )r   r   r"   rn  ro  matches3matches4r   r   r   test_matcher_min_max_operatorm  s0   ru  )Dr+   rJ  r   spacy.matcherr   spacy.tokensr   r   r   doc.test_underscorer   fixturer   r$   r0   rK   rO   rV   r\   r_   rb   rf   rj   markparametrizer}   r   r   r   r   r   r   r   r   r   r   r   r   r   usefixturesr   r   r   r   r   r   r   r  r  r  r  r  r  r  r!  r0  rD  filterwarningsrE  rK  rM  rQ  rT  r[  r\  r_  rp  ru  r   r   r   r   <module>   s    
		
"


	

	

*/C



*

	



	