o
    i                  	   @   s*  d dl mZmZmZmZmZ d dlZd dlmZ d dl	m
Z
mZmZ d dlmZ d dlmZ d dlmZmZmZmZ d dlmZ d d	lmZ d d
lmZ d dlmZ d dlmZ d dl m!Z! d dl"m#Z# d dl$m%Z% d dl&m'Z'm(Z( d dl)m*Z* d dl+m,Z, d dl-m.Z. ej/dd Z0dd Z1ej23ddd Z4ej23ddd Z5ej23ddd  Z6ej23dej27d!d"d#gd!e8fd$d%Z9d&d' Z:d(d) Z;d*d+ Z<d,d- Z=d.d/ Z>d0d1 Z?d2d3 Z@d4d5 ZAd6d7 ZBd8d9 ZCd:d; ZDej23d<d=d> ZEd?d@ ZFdAdB ZGdCdD ZHdEdF ZIdGdH ZJdIdJ ZKdKdL ZLej2MdMdNdO ZNej2MdMdPdQ ZOdRdS ZPdTdU ZQdVdWdXdYdZid[gg d\d]fd^dWdYdXdZid[d_gg d`d]fdadWdYdXdZid[gg d`d]fdbdWdXdYdZid[dcgg d\d]fddi g g ded]fgZRg dfZSdgdh ZTdidj ZUdkdl ZVej2jWdmdndodp ZXej2jWdmdndqdr ZYdsdt ZZdudv Z[dwdx Z\ej27dydzd{e!d|fdzd}e!d|fgd~d Z]ej27ddddgdddgddddddggdd Z^ej23ddd Z_dd Z`ej27dd#d}e!d|fd"d}e!d|fgde8deeaef fddZbdd ZcdS )    )AnyCallableDictIterableTupleN)assert_equal)Languageregistryutil)	ENT_KB_ID)pickle)	CandidateInMemoryLookupKBKnowledgeBaseget_candidatesEnglish)load_kb)build_span_maker)EntityLinker)EntityLinker_v1)DEFAULT_TOK2VEC_MODEL)Scorer)make_tempdir)DocSpan)Example)ensure_path)Vocabc                   C   s   t  S Nr    r    r    [/home/ubuntu/.local/lib/python3.10/site-packages/spacy/tests/pipeline/test_entity_linker.pynlp   s   r"   c                 C   s,   d}| | |  kr| | ksJ  J d S )Ng-C6?r    )abdeltar    r    r!   assert_almost_equal   s   (r&   iB  c                  C   s   t  } t| jdd}g d}g d}tt |jddgddg||gd W d	   n1 s0w   Y  | d
ks=J t -}t	|}|
 sM|  |d }|t| t| jdd}|t| W d	   n1 spw   Y  | d
ks}J d	S )zKTest that setting entities with overlapping identifiers does not mess up IO   entity_vector_length)?g?g)\(?)g?g      @gGz @Q1    o   )entity_list	freq_listvector_listN   kb)r   r   vocabpytestwarnsUserWarningset_entitiesget_size_entitiesr   r   existsmkdirto_diskstr	from_disk)r"   r2   vector1vector2ddir_path	file_pathkb2r    r    r!   test_issue4674"   s,   rD   iJ  c                 C   s
  ddl m} || dd}|jddg dd tt |jd	dgd
gd W d   n1 s0w   Y  |d	du s>J |jddgdgd |jddgdgd t }|	| |
| W d   n1 sjw   Y  | dkswJ t| ddhksJ dS )zNEnsure that the KB does not accept empty strings, but otherwise IO works fine.r   )r   r'   r(   1   r1      r'   entityfreqentity_vector 皙?aliasentitiesprobabilitiesNFx皙?y皙?rH   )spacy.kb.kb_in_memoryr   
add_entityr4   raises
ValueError	add_aliascontains_aliasr   r;   r=   get_size_aliasessetget_alias_strings)en_vocabr   r2   tmp_dirr    r    r!   test_issue6730<   s   
rb   i  c                  C   s   d} t  }|d |d}dddiddiddidd	igd
g}|| || }dd |jD }t|dks:J |d }|jd }|j|j  k rQ|jk sTJ  J ||j	dks^J d S )NzfKathleen Battle sang in Mahler 's Symphony No. 8 at the Cincinnati Symphony Orchestra 's May Festival.sentencizerentity_rulerTHINGLOWERsymphonyno.8labelpatternc                 S   s   g | ]}|qS r    r    ).0sr    r    r!   
<listcomp>f   s    z"test_issue7065.<locals>.<listcomp>rH   r   )
r   add_pipeadd_patternssentslenentsstartendindexsent)textr"   rulerpatternsdoc	sentencessent0entr    r    r!   test_issue7065R   s(   



"r   entity_in_first_sentenceTFc                    s   t  }dd}dg}ddddi}| r|d ddd|d	< g d
}||}t||||d}|g fdd}|jddd}	|	| |j fddd}
tdD ]	}|j |
d qT|		|j
g dS )a  Tests if NEL crashes if entities cross sentence boundaries and the first associated sentence doesn't have an
    entity.
    entity_in_prior_sentence (bool): Whether to include an entity in the first sentence associated with the
    sentence-crossing entity.
    r'   z'Mahler 's Symphony No. 8 was beautiful.)
      WORK)r   r                 ?)Q7304Q270853r      PERSONr   r   )	r1   r   r   r   r1   r   r   r   rQ   linkssent_startsc                    s`   t |  d}|jddg dd |jddgdgd |jd	dg d
d |jdd	gdgd |S )Nr(   r      	   r1   irI   zNo. 8r   rO   r   r   r'   Mahlerr   rX   r[   r3   mykbvector_lengthr    r!   	create_kb   s   z.test_sentence_crossing_ents.<locals>.create_kbentity_linkerTlastc                          S r   r    r    train_examplesr    r!   <lambda>       z-test_sentence_crossing_ents.<locals>.<lambda>get_examplesrH   )sgdN)r   appendr   	from_dictrq   set_kb
initializerangeupdatepredict	reference)r   r"   rz   rQ   r   r   r}   exampler   r   	optimizerir    r   r   r!   test_sentence_crossing_entsn   s*   	

r   c            
         s   ddg difg} t  }dg  | D ]\}}||} t|| qfdd}|jddd	}|| |j fd
dd}tdD ]}i }	|j ||	d qD|jddd |	  d S )NzThe sky is blue.r   )r1   r   r   r   r   r'   c                    6   t |  d}|jddg dd |ddgdg |S Nr(   Q2146908r   r   rI   Russ Cochranr*   r   r   r   r    r!   r         z#test_no_entities.<locals>.create_kbr   Tr   c                      r   r   r    r    r   r    r!   r      r   z"test_no_entities.<locals>.<lambda>r   rH   r   lossesrc   first)
r   r   r   r   rq   r   r   r   r   evaluate)

TRAIN_DATAr"   rz   
annotationr}   r   r   r   r   r   r    r   r!   test_no_entities   s(   
r   c                     sP  ddddiidgg ddfg} t  }dg  | D ]\}}||} t|| qfd	d
}|jddd}|| |j fddd}tdD ]}i }	|j ||	d qK|jddd dddiddigddddiddigdg}
|jddd}|	|
 |
 }d|d v sJ d|d  v sJ d|d v sJ d|d  vsJ d S )!N,Russ Cochran his reprints include EC Comics.r   r   r   r   r   r   r   r1   r   r   r   r   r   r   r   r   rQ   r   r'   c                    r   r   r   r   r   r    r!   r      r   z%test_partial_links.<locals>.create_kbr   Tr   c                      r   r   r    r    r   r    r!   r      r   z$test_partial_links.<locals>.<lambda>r   rH   r   rc   r   r   rf   russcochranrk   ORGeccomicsrd   beforeents_per_typenel_f_per_type)r   r   r   r   rq   r   r   r   r   rr   r   )r   r"   rz   r   r}   r   r   r   r   r   r|   r{   resultsr    r   r!   test_partial_links   s>   




r   c                 C   s,  t | jdd}|jddg dd |jddg d	d |jd
dg dd |jddd
gddgd |jddgdgd | dksCJ | dksKJ |dg dksVJ |dg d	ksaJ |d
g dkslJ t|jdddd t|jd
ddd t|jdddd t|jd
ddd dS )zCTest the valid construction of a KB with 3 entities and two aliasesr'   r(   r+      )      r'   rI   Q2   )rH   r1   r   Q3   )r   ir   douglas皙?rT   rO   adamr*   rH   rJ   rP   Q342r   douglassssssN)	r   r3   rX   r[   r8   r]   
get_vectorr&   get_prior_probr"   r   r    r    r!   test_kb_valid_entities   s   r   c                 C   s   t | jdd}|jdddgd |jdddgd |jd	d
dgd tt |jdddgddgd W d   dS 1 s?w   Y  dS )zSTest the invalid construction of a KB with an alias linked to a non-existing entityr1   r(   r+   r   rI   r   r   rH   r   r   r'   r   r   r   rT   rO   Nr   r3   rX   r4   rY   rZ   r[   r   r    r    r!   test_kb_invalid_entities     "r   c                 C   s   t | jdd}|jdddgd |jdddgd |jd	d
dgd tt |jddd	gddgd W d   dS 1 s?w   Y  dS )zDTest the invalid construction of a KB with wrong prior probabilitiesr1   r(   r+   r   rI   r   r   rH   r   r   r'   r   r   rN   rO   Nr   r   r    r    r!   test_kb_invalid_probabilities*  s   "r   c                 C   s   t | jdd}|jdddgd |jdddgd |jd	d
dgd tt |jddd	gg dd W d   dS 1 s?w   Y  dS )zTTest the invalid construction of a KB with non-matching entity and probability listsr1   r(   r+   r   rI   r   r   rH   r   r   r'   r   )333333?rN   rV   rO   Nr   r   r    r    r!   test_kb_invalid_combination8  r   r   c                 C   sd   t | jdd}|jddg dd tt |jddd	gd W d
   d
S 1 s+w   Y  d
S )zMTest the invalid construction of a KB with non-matching entity vector lengthsr'   r(   r+   r   rG   rI   r   r   rH   N)r   r3   rX   r4   rY   rZ   r   r    r    r!   test_kb_invalid_entity_vectorH  s
   "r   c                 C   s   | j di d}t|jdksJ tjtdd |  W d   n1 s&w   Y  |j dks4J |j dks=J |jj	dksEJ dS )z<Test that the default (empty) KB is loaded upon constructionr   configr   E139)matchN@   )
rq   rt   r2   r4   rY   rZ   validate_kbr8   r]   r)   r"   r   r    r    r!   test_kb_defaultT  s   
r   c                 C   s\   | j dddid}t|jdksJ |j dksJ |j dks$J |jjdks,J dS )zNTest that the default (empty) KB can be configured with a custom entity lengthr   r)   #   r   r   N)rq   rt   r2   r8   r]   r)   r   r    r    r!   test_kb_custom_lengtha  s
   r   c                 C   sH   |  d}tt |dd  W d   dS 1 sw   Y  dS )z2Test that the EL can't initialize without examplesr   c                   S   s   g S r   r    r    r    r    r!   r   n  r   z*test_kb_initialize_empty.<locals>.<lambda>N)rq   r4   rY   	TypeErrorr   r   r    r    r!   test_kb_initialize_emptyj  s   
"r   c              	   C   s   t | jdd}t V}||d  ||d  ||d d  ||d d  ||d  tt ||d d  W d   n1 sKw   Y  W d   dS W d   dS 1 scw   Y  dS )zTest serialization of the KBr1   r(   r2   newunknownN)r   r3   r   r;   r=   r4   rY   rZ   )r"   r   r@   r    r    r!   test_kb_serializeq  s   "r   i#  c                 C   s   g d}t | jdd}|dgdg|g |d|ksJ t '}||d  t | jdd}||d  |d|ks@J W d    d S 1 sKw   Y  d S )Nr   r      r   r   r3   r)   E1r1   r2   )r   r3   r7   r   r   r;   r=   )r"   vkb1r@   rC   r    r    r!   test_kb_serialize_2  s   "r   c                 C   s4  g d}g d}g d}t | jdd}|dgdg|g | dgks&J |dd	gdd
g||g t| dd	hks?J |d|ksHJ |d	|ksQJ t <}||d  t | jdd}||d  t| dd	hksvJ |d|ksJ |d	|ksJ W d   dS 1 sw   Y  dS )zGTest that set_entities entirely overwrites the previous set of entitiesr   )r1   r1   r1   r   )rH   rH   rH   r'   r   r   E0r1   r   E2r   r2   N)	r   r3   r7   get_entity_stringsr^   r   r   r;   r=   )r"   r   v1v2r   r@   rC   r    r    r!   test_kb_set_entities  s$   "r  c                 C   s   d}|| j jvs
J t| j dd}||rJ |j|ddgd ||s(J ||j jv s0J t &}||d  tt dd}||d  ||j jv sQJ W d   dS 1 s\w   Y  dS )	z/Test serialization of the KB and custom strings	MyFunnyIDr1   r(   iV  r'   )rK   rL   r2   N)	r3   stringsr   contains_entityrX   r   r;   r   r=   )r"   rJ   r   r@   mykb_newr    r    r!   test_kb_serialize_vocab  s   "r  c                 C   s`  t | jdd}| d}|dd }|dd }|dd }|dd }|jdd	dgd
 |jdddgd
 |jdddgd
 |jdddgddgd |jddgdgd tt||dks_J tt||dksjJ tt||dksuJ tt||dksJ t||d jdksJ t||d jdksJ tt||d j	d tt||d j
d dS )z!Test correct candidate generationr1   r(   zdouglas adam Adam shrubberyr   rH   r'   r   r+      rI   r   r   r   r   r   r   rV   rO   r   r*   N)r   r3   rX   r[   rt   r   entity_alias_r&   entity_freq
prior_prob)r"   r   r}   douglas_entadam_entAdam_entshrubbery_entr    r    r!   test_candidate_generation  s&   r  c           	         sv  |  d dddigd}|  d}||g dd }| j d	d
did}|| d}| |}|d jdks9J |d jdksBJ |d jdksKJ dd   fddtddttdgtt	 f f fdd}tddtttd gttt	  f ffdd}| j
d	d	dd did did!d}|| | |}|d jdksJ |d jdksJ |d jdksJ d"S )#z8Test correct candidate generation as part of the EL piperc   r   rf   r   rk   rd   c                 S   sL   t | dd}|jdddgd |jddd	gd |jd
ddgddgd |S )Nr1   r(   r   r   rH   rI   r   r   r'   r   r   rV   rO   r   r3   r2   r    r    r!   r     s
   z-test_el_pipe_configuration.<locals>.create_kbr   incl_contextFr   z%Douglas and douglas are not the same.r   NILr1   rM   rH   r   c                 S   s   |  |j S r   )get_alias_candidatesrz   lower)r2   spanr    r    r!   get_lowercased_candidates  s   z=test_el_pipe_configuration.<locals>.get_lowercased_candidatesc                    s    fdd|D S )Nc                    s   g | ]} |qS r    r    )rn   r  )r  r2   r    r!   rp     s    zWtest_el_pipe_configuration.<locals>.get_lowercased_candidates_batch.<locals>.<listcomp>r    )r2   spansr  )r2   r!   get_lowercased_candidates_batch  s   zCtest_el_pipe_configuration.<locals>.get_lowercased_candidates_batchz$spacy.LowercaseCandidateGenerator.v1returnr   c                      r   r   r    r    r  r    r!   create_candidates     z5test_el_pipe_configuration.<locals>.create_candidatesz)spacy.LowercaseCandidateBatchGenerator.v1c                      r   r   r    r    )r  r    r!   create_candidates_batch  r"  z;test_el_pipe_configuration.<locals>.create_candidates_batchz@misc)r  r   get_candidates_batchN)rq   rr   r   
ent_kb_id_r	   miscr   r   r   r   replace_pipe)	r"   rm   r{   r   r   rz   r}   r!  r#  r    )r  r  r!   test_el_pipe_configuration  sH   



r(  c                 C   sB   | j di d}|jdksJ | jddddid}|jdksJ dS )z6Test that n_sents can be set through the configurationr   r   r   n_sentsrH   N)rq   r)  r'  r   r    r    r!   test_nel_nsents  s   
r*  c                 C   s  t | jdd}|jdddgd |jdddgd}|jd	d
dgd |jddd	gddgd |jddgdgd}|d}t|dksEJ |d j|ksNJ |d jdksWJ |d j|ks`J |d j	dksiJ t
 b}||d  t t dd}||d  |d}t|dksJ |d j|ksJ |d jdksJ |d j|ksJ |d j	dksJ |ddgksJ t|ddd W d   dS 1 sw   Y  dS )z7Test that string information is retained across storager1   r(   r+   r  rI   r   r   rH   r   r   r'   r   rN   rV   rO   r   r*   r   r2   N)r   r3   rX   r[   r  rt   rJ   r  rP   r  r   r;   r   r=   r   r&   r   )r"   r   q2_hash	adam_hash
candidatesr@   kb_new_vocabr    r    r!   test_vocab_serialization  s2   

"r/  c                 C   s   t | jdd}|jdddgd |jdddgd |jd	d
dgd |jddd	gddgd |jddgdgd t|ddksCJ |jdddd t|ddksVJ tt	 |jdddd W d   n1 snw   Y  t|ddks~J dS )z5Test that we can append additional alias-entity pairsr1   r(   r+   r  rI   r   r   rH   r   r   r'   r   rN   rV   rO   r   r*   rT   rP   rJ   r  r   N)
r   r3   rX   r[   rt   r  append_aliasr4   r5   r6   r   r    r    r!   test_append_alias?  s   r2  zignore:\[W036c                 C   s   t | jdd}|jdddgd |jdddgd |jd	d
dgd |jddd	gddgd |jddgdgd tt |jdddd W d   dS 1 sQw   Y  dS )zLTest that append an alias will throw an error if prior probs are exceeding 1r1   r(   r+   r  rI   r   r   rH   r   r   r'   r   r   rV   rO   r   r*   rT   r0  N)r   r3   rX   r[   r4   rY   rZ   r1  r   r    r    r!   test_append_invalid_alias]  s   "r3  c                    s   d  fdd}|  d ddddddg}|  d	}|| d
di}| j d|dd}|| |   |jd ks@J d}| |}|jD ]}|j}	|j}
|j	
 }|jD ]}|j|	krg|j|
ksgJ qYqIdS )z9Test that Span.as_doc preserves the existing entity linksr1   c                    s\   t |  d}|jdddgd |jdddgd |jddgd	gd
 |jddgdgd
 |S )Nr(   r+   r   r1   rI   r   r   Bostongffffff?rO   Denverg333333?r   r   r   r    r!   r   u  s   z.test_preserving_links_asdoc.<locals>.create_kbrc   GPEr4  rk   r5  rd   
incl_priorFr   Tr   r   nO(She lives in Boston. He lives in Denver.N)rq   rr   r   r   modelget_dimru   rz   kb_id_ry   as_doc)r"   r   r|   r{   r   r   rz   r}   r   	orig_text
orig_kb_idsent_docs_entr    r   r!   test_preserving_links_asdocp  s2   







rC  c                 C   s   d}| |}t t|jdksJ t|ddddd}|g|_t t|jdks)J t|jd jdks5J t|jd jdksAJ d	S )
+Test that doc.ents preserves KB annotationsr:  r   r'   r   LOCr+   rl   kb_idr1   N)rt   listru   r   label_r=  )r"   rz   r}   
boston_entr    r    r!   test_preserving_links_ents  s   rK  c                 C   s   d}| |}t t|jdksJ |jjd}|jjd}||ddfg|_t t|jdks2J t|jd jdks>J t|jd jdksJJ dS )	rD  r:  r   rE  r+   r'   r   r1   N)rt   rH  ru   r3   r  addrI  r=  )r"   rz   r}   locq1r    r    r!   test_preserving_links_ents_2  s   rO  CRuss Cochran captured his first major title with his son as caddie.r   r   r   )Q7381115r   r   )r1   r   r   r   r   r   r   r   r   r   r   r   r   r   r   )"   +   ARTr   +Russ Cochran has been publishing comic art.@Russ Cochran was a member of University of Kentucky's golf team.)rS  3   rE  zThe weather is nice today.)r1   r   r   r   r   r   )r   rQ  rQ  r   c                     s^  t   dd jjvsJ g tD ]\} } | }t|| qfdd} jddddid}t|t	s:J |
| d|jjv sGJ d|jjjv sPJ  jfd	d
d}|jdksdJ |jd|jjkspJ tdD ]}i } j||d qt|d dk sJ  jddd dddiddigdg} jddd}	|	| g }
tD ]\} } | }|jD ]}|
|j qq|
tksJ t Y} | t|}|j jksJ d|jjv sJ |d}d|jjv sJ d|jjjv sJ g }
tD ]\} }|| }|jD ]	}|
|j q
q|
tksJ W d    n	1 s'w   Y  g d}dd  |D }dd  |D }dd  fdd|D D }t|| t||  }d|v shJ d |v soJ d!|v svJ d"|v s}J d#|v sJ d$|v sJ d%|v sJ d|d% v sJ |d! d&ksJ |d$ d&ksJ d S )'Nr'   r   c                    P   t |  d}|jddg dd |jddg dd |jdddgd	d	gd
 |S Nr(   r   r   r   rI   rQ  r   r         ?rO   r   r   r   r    r!   r        z4test_overfitting_IO_gold_entities.<locals>.create_kbr   Tuse_gold_entsr   r   c                      r   r   r    r    r   r    r!   r     r   z3test_overfitting_IO_gold_entities.<locals>.<lambda>r   r9  2   r   MbP?rc   r   r   rf   r   r   rk   rd   r   )rP  r   rU  rV  c                 S      g | ]}| tgqS r    to_arrayr   rn   r}   r    r    r!   rp   &      z5test_overfitting_IO_gold_entities.<locals>.<listcomp>c                 S   r`  r    ra  rc  r    r    r!   rp   '  rd  c                 S   r`  r    ra  rc  r    r    r!   rp   (  rd  c                    s   g | ]} |qS r    r    )rn   rz   )r"   r    r!   rp   (  s    nel_macro_pnel_macro_rnel_macro_fnel_micro_pnel_micro_rnel_micro_fr   r   )r   r3   r  r   r   r   r   rq   
isinstancer   r   r2   r   r;  r<  r)   r   r   rr   ru   r=  GOLD_entitiesr   r;   r
   load_model_from_path
pipe_namesget_pipepiper   r   )rz   r   r}   r   r   r   r   r   r|   r{   predictionsr   ra   nlp2entity_linker2doc2textsbatch_deps_1batch_deps_2no_batch_depsevalr    )r"   r   r   r!   !test_overfitting_IO_gold_entities  s   










rz  c                     s  t  } d d| jjvsJ g }tD ]\}}| |}|t|| q fdd}| jddd}| jddd	d
id}|| g }tD ]\}}|t| 	|| |
dD ]	}	||	d  qXqC|  }
tdD ]}i }| j||
|d qk|d dk sJ |d dk sJ | jddd d}| |}|j}t|dksJ |d jdksJ |d jdksJ |d jdksJ t D}| | t|}|j| jksJ ||}|j}t|dksJ |d jdksJ |d jdksJ |d jdksJ W d    n	1 sw   Y  | |}d|v sJ d|v sJ d|v s%J d|v s,J d|v s3J d|d v s<J d|d v sEJ |d dksNJ |d dksWJ |d dks`J d S )Nr'   r   c                    rX  rY  r   r   r   r    r!   r   F  r[  z/test_overfitting_IO_with_ner.<locals>.create_kbnerTr   r   r\  Fr]  rQ   rH   r^  r   r_  rc   rP  r1   r   r   r   r  rg  rj  ents_fr   r   )r   r3   r  r   r   r   r   rq   r   make_docget	add_labelr   r   r   ru   rt   rz   rI  r=  r   r;   r
   rm  rn  r   )r"   r   rz   r   r}   r   r{  r   annotationsr   r   r   r   	test_textru   ra   rr  rt  ents2ry  r    r   r!   test_overfitting_IO_with_ner:  sn   




r  c                  C   s  d} t  }|d }t }d|jjvsJ t|j| d}|jddg dd |jddgd	gd
 d|jjv s8J || t }d|jjvsHJ |jjd d|jjv sWJ d|jjvs_J |j	ddd}|
t| d|jjv suJ d|jjv s}J W d    d S 1 sw   Y  d S )Nr'   r2   r   r(   r   r   rI   r   r   rO   
RandomWordr   Tr   )r   r   r3   r  r   rX   r[   r;   rL  rq   r   r   )r   ra   kb_dirnlp1r   rr  r   r    r    r!   test_kb_serialization  s(   
"r  zNeeds fixing)reasonc                  C   s~   t  } t| jdd}|jddg dd |drJ |jddgdgd	 |ds,J t|}t|}|ds=J d S 
Nr'   r(   r   r   r   rI   r   r   rO   )	r   r   r3   rX   r\   r[   r   dumpsloads)r"   kb_1datakb_2r    r    r!   test_kb_pickle  s   

r  c                  C   s   dd } t  }|d |jddd}||  |jddgks!J |jds)J t|}t|}|jddgks<J |	d}|jdsIJ d S )Nc                 S   8   t | dd}|jddg dd |jddgdgd	 |S r  r   r  r    r    r!   r        z"test_nel_pickle.<locals>.create_kbr{  r   Tr   r   )
r   rq   r   rn  r2   r\   r   r  r  ro  )r   nlp_1entity_linker_1r  nlp_2entity_linker_2r    r    r!   test_nel_pickle  s   




r  c                  C   s  t  } t| jdd}|jddg dd |jddg d	d |jd
dgdgd |jddgdgd |jdddgddgd |d
sEJ | }t| jdd}|d
rWJ ||}| | ksfJ |j	|j	ksnJ |
 |
 ksxJ |d|dksJ |d|dksJ |d
sJ | | ksJ | | ksJ t|d
t|d
ksJ t|dt|dksJ d S )Nr'   r(   r   r   r   rI   Q66r   rG   r   r   rO   BoeingrZ  
RandomnessrV   rT   )r   r   r3   rX   r[   r\   to_bytes
from_bytesr8   r)   r  r   r]   r_   rt   r  )r"   r  kb_bytesr  r    r    r!   test_kb_to_bytes  s8   

r  c                  C   s   dd } t  }|d |jddd}||  |jds J |jddgks)J | }t  }|d |jddd |jddgksEJ |djdrPJ ||}|dj}|dsbJ |	dg d	ksmJ t
|jddd
d d S )Nc                 S   r  r  r   r  r    r    r!   r     r  z$test_nel_to_bytes.<locals>.create_kbr{  r   Tr   r   r   r   r   r   )r   rq   r   r2   r\   rn  r  ro  r  r   r&   r   )r   r  r  	nlp_bytesr  r  r    r    r!   test_nel_to_bytes  s(   



r  c            	      C   s  g } t  }|d}t|dddddt|ddd	d
dg|_|d}t|dddddt|ddd	d
dg|_| t|| |d}t|dddddt|ddd	ddg|_|d}t|dddddt|ddd	ddg|_| t|| |d}t|ddd	ddg|_|d}t|ddd	ddg|_| t|| t j| dgd}|d d d dksJ |d d d dksJ |d d	 d dksJ |d d	 d dksJ |d dksJ |d dksJ d S )NzJulia lives in London happily.r   r1   r   r   rF  r'   r   rE  r   Q70zShe loves London.rH   Q13r  zLondon is great.)negative_labelsr   prZ  rr   rh  gUUUUUU?ri  )r   r   ru   r   r   r   score_links)	r   r"   ref1pred1ref2pred2ref3pred3scoresr    r    r!   test_scorer_links  s@   r  zname,configr   spacy.EntityLinker.v1)@architecturestok2veczspacy.EntityLinker.v2c                    s   dt  }g  tD ]\}}||} t|| q	fdd}|j| d|id}|d dkr9t|ts8J nt|t	s@J |
| |j fdd	d
}tdD ]}	i }
|j ||
d qSd S )Nr'   c                    rX  rY  r   r   r   r    r!   r   N  s   z,test_legacy_architectures.<locals>.create_kbr;  r   r  r  c                      r   r   r    r    r   r    r!   r   _  r   z+test_legacy_architectures.<locals>.<lambda>r   rH   r   )r   r   r}  r   r   r   rq   rk  r   r   r   r   r   r   )namer   r"   rz   r   r}   r   r   r   r   r   r    r   r!   test_legacy_architectures<  s"   


r  r|   	CHARACTERKirbyrk   r   KorbyISisCOLORpinkc                    s  ddddiidgg ddfg}t  }dg  |D ]\}}||} t|| q|d	}||   D ]}||j|_q7|d	 fd
d}|jdddidd}	|	| |	j	du saJ |j
 fddd}
tdD ]}i }|j |
|d qo|jddd |  d S )NzKirby is pink)r   r   Q613241r   )r   r   r  )r1   r   r   r   r'   rd   c                    s\   t |  d}|jddg dd |ddgdg |jddg d	d |ddgdg |S )
Nr(   r  r   r   rI   r  r*   r  )r   rH   r   r   r   r    r!   r     s   z$test_no_gold_ents.<locals>.create_kbr   r\  FTr8  c                      r   r   r    r    r   r    r!   r     r   z#test_no_gold_ents.<locals>.<lambda>r   rH   r   rc   r   )r   r   r   r   rq   rr   	predictedremove_piper   r\  r   r   r   r   )r|   r   r"   rz   r   r}   r{   egr   r   r   r   r   r    r   r!   test_no_gold_entsf  s<   






r  ig%  c            	         s   t  } t| jddgddgddgd}t| jg dg d	g d
d}t||}|g dfdd}| jddd}|| | j fddd}tdD ]}i }| j ||d qK| jddd | 	  d S )Nr  123456TFB-CHARACTER
B-CARDINAL)wordsspacesru   )r  123456)TFF)r  r  r  r'   c                    r   )Nr(   r  r   r   rI   r  r*   r   r   r   r    r!   r     r   z-test_tokenization_mismatch.<locals>.create_kbr   r   c                      r   r   r    r    r   r    r!   r     r   z,test_tokenization_mismatch.<locals>.<lambda>r   rH   r   rc   r   )
r   r   r3   r   rq   r   r   r   r   r   )	r"   doc1rt  r  r   r   r   r   r   r    r   r!   test_tokenization_mismatch  s2   

r  c                   C   s:   t t tdd W d   dS 1 sw   Y  dS )z;Test whether instantiation of abstract KB base class fails.Nr'   )r4   rY   r   r   r    r    r    r!   test_abstract_kb_instantiation  s   "r  zmeet_threshold,configmeet_thresholdr   c                    s   t  }|d d}dg}dddii}g d}d ||}t||||dg fd	d
}|jddd|dd}	|	| |jfddd |jddd}
|
dddigdg ||}t|jdksiJ ru|jd j	 ks|J t
jszJ dS dS )zTests abstention threshold.
    meet_threshold (bool): Whether to configure NEL setup so that confidence threshold is met.
    config (Dict[str, Any]): NEL architecture config.
    rc   z&Mahler's Symphony No. 8 was beautiful.r   r   r   r   )	r1   r   r   r   r   r   r   r   r   r   c                    s@   t | dd}|j dg dd |jd grdndgd	 |S )
Nr'   r(   r   r   rI   r   r1   g{Gz?rO   r   r   )	entity_idr  r    r!   r     s   z!test_threshold.<locals>.create_kbr   TgGz?)	thresholdr;  r]  c                      r   r   r    r    r   r    r!   r   	  r   z test_threshold.<locals>.<lambda>r   rd   r   r   rf   mahlerrk   r1   r   N)r   rq   r   r   r   r   rr   rt   ru   r=  r   r  )r  r   r"   rz   rQ   r   r   r}   r   r   r{   r    )r  r  r   r!   test_threshold  s2   

*r  c                  C   sH   t  } | d}|dd }d|_|g|_| d}t }|||gd dS )zCThe forward pass of the span maker may have a doc with no entities.za b cr   r1   Xzx y zFN)r   rI  ru   r   )r"   r  r   rt  
span_makerr    r    r!   "test_span_maker_forward_with_empty  s   r  )dtypingr   r   r   r   r   r4   numpy.testingr   spacyr   r	   r
   spacy.attrsr   spacy.compatr   spacy.kbr   r   r   r   spacy.lang.enr   spacy.mlr   spacy.ml.models.entity_linkerr   spacy.pipeliner   spacy.pipeline.legacyr   spacy.pipeline.tok2vecr   spacy.scorerr   spacy.tests.utilr   spacy.tokensr   r   spacy.trainingr   
spacy.utilr   spacy.vocabr   fixturer"   r&   markissuerD   rb   r   parametrizeboolr   r   r   r   r   r   r   r   r   r   r   r   r   r  r  r  r(  r*  r/  r2  filterwarningsr3  rC  rK  rO  r   rl  rz  r  r  xfailr  r  r  r  r  r  r  r  r  r<   r  r  r    r    r    r!   <module>   s    







3'3	

 =
$



(jX

!,
"



<
'0