o
    i\?                     @   s|  d dl Z d dlmZmZ d dlZd dlmZ d dlmZ d dlm	Z	 d dl
mZ d dlmZ e jedd	d
 Ze jdd Ze jdd Ze jdd Zdd Zdd Zdd Zdd Zdd Zdd Zdd Zdd  Zd!d" Zd#d$ Zd%d& Zd'd( Zd)d* Z e j!"d+d,d-gd.d/ Z#d0d1 Z$d2d3 Z%d4d5 Z&d6d7 Z'd8d9 Z(d:d; Z)d<d= Z*d>d? Z+d@dA Z,dBdC Z-dDdE Z.dS )F    N)NumpyOpsget_current_ops)registry)MatchPatternError)make_tempdir)Span)Examplespan_ruler_patternsc                   C   sR   dddddddiddigddd	digd
dddddgdddddddgS )NHELLOhello worldhello1)labelpatternidBYELOWERbyer   r   ORTHhello2COMPLEXfoo*)r   OPTECH_ORGApple	Microsoft r   r   r   X/home/ubuntu/.local/lib/python3.10/site-packages/spacy/tests/pipeline/test_span_ruler.pypatterns   s   
r   c                   C   s   ddddddgS )NFOOBARzfoo barr   BARBAZzbar bazr   r   r   r   r   overlapping_patterns   s   r"   c                   C   s   dddddddddgS )NPERSONDinar   ORGACMEACMr   r   r   r   r   person_org_patterns!   s   r(   c                 C   s   | dddg S )NDATE	June 14thr   r   )r(   r   r   r   person_org_date_patterns*   s   r+   c                 C   s   t d}|jdddid}||  tdd |jj D }|dks&J |g  td	d |jj D }||ks>J d
S )z/Test that patterns don't get added excessively.xx
span_rulervalidateTconfigc                 s       | ]}t |V  qd S Nlen.0mmr   r   r   	<genexpr>4       z,test_span_ruler_add_empty.<locals>.<genexpr>r   c                 s   r1   r2   r3   r5   r   r   r   r8   7   r9   N)spacyblankadd_pipeadd_patternssummatcher	_patternsvalues)r   nlprulerpattern_countafter_countr   r   r   test_span_ruler_add_empty/   s   


rF   c                 C   s   t d}|d}||  t|t| ksJ t|jdks"J d|v s(J d|v s.J |d}t|jd dks=J |jd d	 jdksIJ |jd d	 jd
ksUJ |jd d jdksaJ |jd d jdksmJ d S )Nr,   r-      r
   r   hello world bye byerC      r   r       )	r:   r;   r<   r=   r4   labelsspanslabel_id_r   rB   rC   docr   r   r   test_span_ruler_init;   s   


rR   c                  C   s   t d} | d}t|dksJ t|jdksJ | jdgks#J tt | d}W d    n1 s7w   Y  t|j	d dksGJ d S )Nr,   r-   r   rH   rC   )
r:   r;   r<   r4   rL   
pipe_namespytestwarnsUserWarningrM   )rB   rC   rQ   r   r   r   !test_span_ruler_no_patterns_warnsK   s   


rW   c                 C   s  t d}|d}t|jdksJ |jdd | d t|jdks%J |d}|jd	 d jd
ks5J |jd	 d jdksAJ |d dddii|j	d d d< |d}t|jdksaJ |  t|jdksnJ |d}|jd	 d jd
ks~J |jd	 d jdksJ d S )Nr,   r-   r   c                   S      g S r2   r   r   r   r   r   <lambda>[       z/test_span_ruler_init_patterns.<locals>.<lambda>)r   rG   rH   rC   r
   rJ   r   r   @miscr	   
initialize
components)
r:   r;   r<   r4   rL   r\   rM   rN   remove_piper0   rP   r   r   r   test_span_ruler_init_patternsV   s$   



r_   c                 C   sT   t d}|d}||  t|jdksJ |dd  t|jdks(J dS )z)Test that initialization clears patterns.r,   r-   rG   c                   S   rX   r2   r   r   r   r   r   rY   t   rZ   z,test_span_ruler_init_clear.<locals>.<lambda>r   N)r:   r;   r<   r=   r4   rL   r\   r   rB   rC   r   r   r   test_span_ruler_init_clearn   s   


ra   c                 C   s   t d}|d}||  t|jdksJ |d}t|jd dks'J |  t|jdks4J t	t
 |d}W d    n1 sHw   Y  t|jd dksXJ d S )Nr,   r-   rG   r   rC   rJ   r   )r:   r;   r<   r=   r4   rL   rM   clearrT   rU   rV   rP   r   r   r   test_span_ruler_clearx   s   



rc   c                 C   s   t d}|jdddid}||  |d}|dd g|jd	< ||}t|jd	 d
ks1J |jd	 d |dd ks@J |jd	 d jdksLJ |jd	 d jdksXJ |jd	 d jdksdJ |jd	 d jdkspJ d S )Nr,   r-   	overwriteFr/   OH HELLO WORLD bye byer   rI   rC      rJ   r
   r   r   rK   )	r:   r;   r<   r=   make_docrM   r4   rN   rO   rP   r   r   r   test_span_ruler_existing   s   


rh   c                 C   s   t d}|jdddid}||  |d}|dd g|jd	< ||}t|jd	 dks1J |jd	 d jd
ks=J |jd	 d jd
ksIJ |jd	 d jdksUJ d S )Nr,   r-   rd   Tr/   re   r   rI   rC   r
   rJ   r   )	r:   r;   r<   r=   rg   rM   r4   rN   textrP   r   r   r   "test_span_ruler_existing_overwrite   s   


rj   c                 C   s   t d}|d}||  t|t| ksJ t|jdks"J | }t d}|d}t|dks8J t|jdksAJ ||}t|t| ksPJ t|jdksYJ t|jt|jkseJ |jD ]	}||jv sqJ qht	|jt	|jks~J d S )Nr,   r-   rG   r   )
r:   r;   r<   r=   r4   rL   to_bytes
from_bytesr   sorted)r   rB   rC   ruler_bytesnew_nlp	new_rulerr   r   r   r   test_span_ruler_serialize_bytes   s"   






rq   c                  C   s   t d} | d}| jddddid}dddigd	}dd
digd	}tt ||g W d    n1 s:w   Y  ||g tt ||g W d    d S 1 s\w   Y  d S )Nr,   r-   validated_span_rulerr.   T)namer0   r
   r   r   ASDF)r:   r;   r<   rT   raises
ValueErrorr=   r   )rB   rC   validated_rulervalid_patterninvalid_patternr   r   r   test_span_ruler_validate   s   


"rz   c                 C   sN   t d}|jdddid}||  t|jttdd | D ks%J d S )Nr,   r-   rd   Tr/   c                 S   s   g | ]}|d  qS r   r   )r6   pr   r   r   
<listcomp>   s    z.test_span_ruler_properties.<locals>.<listcomp>)r:   r;   r<   r=   rm   rL   setr`   r   r   r   test_span_ruler_properties   s   

(r   c                 C   sv   t d}|d}||  ||d}t|jd dks!J |jd d jdks-J |jd d jd	ks9J d S )
Nr,   r-   foo bar bazrC   rI   r   r    rJ   r!   r:   r;   r<   r=   rg   r4   rM   rN   r"   rB   rC   rQ   r   r   r   !test_span_ruler_overlapping_spans   s   


r   c                 C   s   t d}|d}||  d}|||}t|jd dks#J |jd d jdks/J |jd d jd	ks;J ||}t|dddd
g|jd< |	t
||g}|d dks]J |d dkseJ d S )Nr,   r-   r   rC   rI   r   r    rJ   r!   r{   spans_ruler_pg      ?spans_ruler_rg      ?)r:   r;   r<   r=   rg   r4   rM   rN   r   evaluater   )r"   rB   rC   ri   pred_docref_docscoresr   r   r   test_span_ruler_scorer   s   



r   	n_processrJ   rI   c                 C   s|   t tts	| dk r:dg}dddg}td}|d}|| |j|ddD ]}|jd	 D ]	}|j	dks8J q/q(d S d S )
NrI   zI enjoy eating Pizza Hut pizza.FASTFOODz	Pizza Hutr   r,   r-   )r   rC   )

isinstancer   r   r:   r;   r<   r=   piperM   rN   )r   textsr   rB   rC   rQ   entr   r   r   test_span_ruler_multiprocessing   s   


	r   c              	   C   s   t d}|d}||  t ;}||d  ||d  tt	 ||d  W d    n1 s8w   Y  W d    d S W d    d S 1 sPw   Y  d S )Nr,   r-   
test_rulernon_existing_dir)
r:   r;   r<   r=   r   to_disk	from_diskrT   ru   rv   )r   rB   rC   dr   r   r   test_span_ruler_serialize_dir   s   


"r   c                 C   s   t d}|d}||  ||d}t|jdksJ t|jd dks*J |jd d jdks6J |jd d j	d	ksBJ |
d ||d}t|jd dksYJ t|jd
ksbJ d S )Nr,   r-   Dina went to schoolrf   rC   rJ   r   r#   r$   rI   r:   r;   r<   r=   rg   r4   r   rM   rN   ri   remover(   rB   rC   rQ   r   r   r   test_span_ruler_remove_basic	  s   



r   c                 C   s   t d}|d}||  t|jdksJ tt |	d W d    n1 s-w   Y  tt |
d W d    d S 1 sHw   Y  d S )Nr,   r-   rf   NE)r:   r;   r<   r=   r4   r   rT   ru   rv   r   remove_by_id)r(   rB   rC   r   r   r   *test_span_ruler_remove_nonexisting_pattern  s   


"r   c                 C   s  t d}|d}||  ||d}t|jdksJ t|jd dks*J |jd d jdks6J |jd d j	d	ksBJ |jd d
 jdksNJ |jd d
 j	dksZJ |
d ||d}t|jdksoJ t|jd d
kszJ |jd d jdksJ |jd d j	dksJ |
d tt$ ||d}t|jdksJ t|jd dksJ W d    d S 1 sw   Y  d S )Nr,   r-   zDina founded the company ACME.rf   rC   rI   r   r#   r$   rJ   r%   r&   zDina founded the company ACME)r:   r;   r<   r=   rg   r4   r   rM   rN   ri   r   rT   rU   rV   r   r   r   r   'test_span_ruler_remove_several_patterns#  s,   




"r   c                 C   s  t d}|d}||  ||d}t|jd dks!J |jd d jdks-J |jd d jdks9J |jd d	 jd
ksEJ |jd d	 jdksQJ |jd d jdks]J |jd d jdksiJ |	d
 |	d ||d}t|jd d	ksJ d S )Nr,   r-   *Dina founded the company ACME on June 14thrC   rf   r   r#   r$   rJ   r%   r&   rI   r)   r*   r   )
r:   r;   r<   r=   rg   r4   rM   rN   ri   r   r+   rB   rC   rQ   r   r   r   (test_span_ruler_remove_patterns_in_a_row;  s   




r   c                 C   s   t d}|d}||  t|jdksJ |d t|jdks&J |d t|jdks4J |d t|jd	ksBJ tt	 ||
d
}t|jd d	ksZJ W d    d S 1 sew   Y  d S )Nr,   r-   rG   r#   rf   r%   rJ   r)   r   r   rC   )r:   r;   r<   r=   r4   r   r   rT   rU   rV   rg   rM   r   r   r   r   #test_span_ruler_remove_all_patternsM  s   





"r   c                  C   s  t d} | d}dddg}|| || d}t|jdks%J t|jd dks0J |jd d	 jdks<J |jd d	 j	dksHJ d
ddg}|| || d}t|jdkscJ t|jd dksnJ |jd d	 jdkszJ |jd d	 j	dksJ |jd d jd
ksJ |jd d j	dksJ |
d || d}t|jdksJ t|jd dksJ |jd d	 jd
ksJ |jd d	 j	dksJ || || d}t|jdksJ t|jd dksJ dddg}|| || d}t|jdksJ t|jd dksJ |
d || d}t|jdks4J t|jd dks@J d S )Nr,   r-   DATE1z	last timer   z=I saw him last time we met, this time he brought some flowersrJ   rC   r   DATE2z	this timerI   DATE3zanother timez[I saw him last time we met, this time he brought some flowers, another time some chocolate.rf   r   )rB   rC   	patterns1rQ   	patterns2	patterns3r   r   r   test_span_ruler_remove_and_add]  sj   







r   c                 C   sj   t d}|jddddiid}||  ||d}t|jd d	ks'J |jd d
 jdks3J d S )Nr,   r-   spans_filterr[   z#spacy.first_longest_spans_filter.v1r/   r   rC   rJ   r   r    r   r   r   r   r   test_span_ruler_spans_filter  s   


r   c                 C   s^   t d}|jdddid}||  ||d}t|jdks#J |jd jd	ks-J d S )
Nr,   r-   annotate_entsTr/   r   rJ   r   r    )r:   r;   r<   r=   rg   r4   entsrN   r   r   r   r   #test_span_ruler_ents_default_filter  s   

r   c                 C   s   t d}|jdddddidd}||  |d	}t|d
dddt|ddddg|_||}t|jdks:J |jd jdksDJ |jd
 jdksNJ d S )Nr,   r-   TFr[   z#spacy.prioritize_new_ents_filter.v1)r   rd   ents_filterr/   zfoo bar baz a b crJ   rf   r!   r{      ABCrI   r   r    )	r:   r;   r<   r=   rg   r   r   r4   rN   r   r   r   r   %test_span_ruler_ents_overwrite_filter  s   


"r   c                 C   s|   t ddd }td}|jddddidd	}||  tt ||	d
 W d    d S 1 s7w   Y  d S )Ntest_pass_through_filterc                  S   s   dd } | S )Nc                 S   s   | | S r2   r   )spans1spans2r   r   r   pass_through_filter  s   z^test_span_ruler_ents_bad_filter.<locals>.make_pass_through_filter.<locals>.pass_through_filterr   )r   r   r   r   make_pass_through_filter  s   zAtest_span_ruler_ents_bad_filter.<locals>.make_pass_through_filterr,   r-   Tr[   )r   r   r/   r   )
r   miscr:   r;   r<   r=   rT   ru   rv   rg   )r"   r   rB   rC   r   r   r   test_span_ruler_ents_bad_filter  s   


"r   )/rT   	thinc.apir   r   r:   r   spacy.errorsr   spacy.tests.utilr   spacy.tokensr   spacy.trainingr   fixturer   r   r"   r(   r+   rF   rR   rW   r_   ra   rc   rh   rj   rq   rz   r   r   r   markparametrizer   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   <module>   sV    





8	