o
    i                     @   s   d dl mZ d dlmZmZmZmZ d dlZd dlm	Z	 d dl
mZ d dlmZmZ d dlmZ d dlmZmZmZmZ d d	lmZ d
dlmZ dd Zdd Zdd Zdd ZdS )    )Path)AnyCallableDictIterableN)zeros)Config)Errorsutil)InMemoryLookupKB)SimpleFrozenListensure_pathload_model_from_configregistry)Vocab   )make_tempdirc                 C   s   t | }t| t ,}t|}| s|  |d }|t| t| dd}|	t| W d    n1 s:w   Y  t| d S )Nkb   vocabentity_vector_length)
_get_dummy_kb	_check_kbr   r   existsmkdirto_diskstrr   	from_disk)en_vocabkb1ddir_path	file_pathkb2 r%   [/home/ubuntu/.local/lib/python3.10/site-packages/spacy/tests/serialize/test_serialize_kb.pytest_serialize_kb_disk   s   
r'   c                 C   s   t | dd}|jddg dd |jddg d	d |jd
dg dd |jddg dd |jddd
gddgd |jdg dg dd |jdd
gdgd |S )Nr   )r   Q53!   )r      r   )entityfreqentity_vectorQ17r         r   Q007r0   r   r   r0   Q44V  )   r6   r6   double07皙?g?)aliasentitiesprobabilitiesguy)r(   r2   r.   r4   )333333?r=   g?r8   randomg      ?)r   
add_entity	add_aliasr   r   r%   r%   r&   r   #   s   r   c                 C   s  |   dksJ dD ]
}||  v sJ q
dD ]
}||  vs!J q|  dks*J dD ]
}||  v s6J q,dD ]
}||  vsCJ q9t| ddd	 d
}t|dksWJ |d jdks`J d|d j  k rndk sqJ  J |d j	g dks|J |d j
dksJ d|d j  k rdk sJ  J |d jdksJ d|d j  k rdk sJ  J |d j	g dksJ |d j
dksJ d|d j  k rdk sJ  J d S )Nr6   )r(   r.   r2   r4   ) Q0r   )r7   r<   r>   )nothingnessrB   randomnoiser7   c                 S   s   | j S N)entity_)xr%   r%   r&   <lambda>E   s    z_check_kb.<locals>.<lambda>)keyr   r   r2   g"@g
ףp=
@r3   gS㥛?gE?r1   r.   gףp=
?gGz @r/   gMbX?gB`"۹?)get_size_entitiesget_entity_stringsget_size_aliasesget_alias_stringssortedget_alias_candidateslenrG   entity_freqr-   alias_
prior_prob)r   entity_stringalias_string
candidatesr%   r%   r&   r   5   s,   """&r   c            	   	      sB  d} G dd dt  tddtttg f f fdd}tdd	td
tdttg f f fdd}t | }t|dd}|	  |
d}t|j ksSJ |jjdks[J |jjdkscJ t 1}|| t|}|
d}t|j ksJ |jjdksJ |jjdksJ W d   dS 1 sw   Y  dS )z>Check that IO of a custom KB works fine as part of an EL pipe.a  
    [nlp]
    lang = "en"
    pipeline = ["entity_linker"]

    [components]

    [components.entity_linker]
    factory = "entity_linker"
    
    [components.entity_linker.generate_empty_kb]
    @misc = "kb_test.CustomEmptyKB.v1"
    
    [initialize]

    [initialize.components]

    [initialize.components.entity_linker]

    [initialize.components.entity_linker.kb_loader]
    @misc = "kb_test.CustomKB.v1"
    entity_vector_length = 342
    custom_field = 666
    c                       sL   e Zd Z fddZe fdee fddZe fdee fddZ  Z	S )z9test_serialize_subclassed_kb.<locals>.SubInMemoryLookupKBc                    s   t  || || _d S rF   )super__init__custom_field)selfr   r   rZ   	__class__r%   r&   rY   r   s   
zBtest_serialize_subclassed_kb.<locals>.SubInMemoryLookupKB.__init__excludec                    s   t |}| s|jdd | sttjj|ddtddf fdd fd	d
 fdd
fdd
d}t	
||| dS )z[We overwrite InMemoryLookupKB.to_disk() to ensure that self.custom_field is stored as well.T)parentslocr#   returnNc                    s   t | d ji d S NrZ   )srsly
write_jsonrZ   r#   r[   r%   r&   serialize_custom_fields~   s   zbtest_serialize_subclassed_kb.<locals>.SubInMemoryLookupKB.to_disk.<locals>.serialize_custom_fieldsc                    
     | S rF   )write_contentsprg   r%   r&   rI         
 zStest_serialize_subclassed_kb.<locals>.SubInMemoryLookupKB.to_disk.<locals>.<lambda>c                        j j| S rF   )r   stringsr   rk   rg   r%   r&   rI          c                        | S rF   r%   rk   )rh   r%   r&   rI          contentszstrings.jsoncustom_fields)r   r   r   is_dir
ValueErrorr	   E928formatr   r
   r   )r[   pathr^   	serializer%   )r[   rh   r&   r   v   s   


zAtest_serialize_subclassed_kb.<locals>.SubInMemoryLookupKB.to_diskc                    s   t |}| sttjj|d| sttjj|ddtddffdd fddfd	d fd
dd}t	
||| dS )z]We overwrite InMemoryLookupKB.from_disk() to ensure that self.custom_field is loaded as well.r`   r#   rb   Nc                    s   t | d  _d S rc   )rd   	read_jsonrZ   rf   rg   r%   r&   deserialize_custom_fields   s   zftest_serialize_subclassed_kb.<locals>.SubInMemoryLookupKB.from_disk.<locals>.deserialize_custom_fieldsc                    ri   rF   )read_contentsrk   rg   r%   r&   rI      rm   zUtest_serialize_subclassed_kb.<locals>.SubInMemoryLookupKB.from_disk.<locals>.<lambda>c                    rn   rF   )r   ro   r   rk   rg   r%   r&   rI      rp   c                    rq   rF   r%   rk   )r}   r%   r&   rI      rr   rs   )r   r   rw   r	   E929ry   rv   rx   r   r
   r   )r[   rz   r^   deserializer%   )r}   r[   r&   r      s   


zCtest_serialize_subclassed_kb.<locals>.SubInMemoryLookupKB.from_disk)
__name__
__module____qualname__rY   r   r   r   r   r   __classcell__r%   r%   r\   r&   SubInMemoryLookupKBq   s     r   zkb_test.CustomEmptyKB.v1rb   c                     s   dt dtf fdd} | S )Nr   r   c                    s    | |ddS )Nr   r   r   rZ   r%   r   r   r%   r&   empty_kb_factory   s
   zOtest_serialize_subclassed_kb.<locals>.empty_custom_kb.<locals>.empty_kb_factory)r   int)r   r   r%   r&   empty_custom_kb   s   z5test_serialize_subclassed_kb.<locals>.empty_custom_kbzkb_test.CustomKB.v1r   rZ   c                    s    fdd}|S )Nc                    s$    | d}| ddt |S )Nr   random_entityg        )r?   r   rA   )r   rZ   r   r%   r&   custom_kb_factory   s   zJtest_serialize_subclassed_kb.<locals>.custom_kb.<locals>.custom_kb_factoryr%   )r   rZ   r   r   )rZ   r   r&   	custom_kb   s   	z/test_serialize_subclassed_kb.<locals>.custom_kbT)	auto_fillentity_linkerr5   i  N)r   r   miscr   r   r   r   from_strr   
initializeget_pipetyper   r   rZ   r   r   r
   load_model_from_path)	config_stringr   r   confignlpr   tmp_dirnlp2entity_linker2r%   r   r&   test_serialize_subclassed_kbU   s6   ) 




"r   )pathlibr   typingr   r   r   r   rd   numpyr   	thinc.apir   spacyr	   r
   spacy.kb.kb_in_memoryr   
spacy.utilr   r   r   r   spacy.vocabr   r   r'   r   r   r   r%   r%   r%   r&   <module>   s     