o
    iM                     @   s<  d dl Z z
d dlmZmZ W n ey   d dlmZmZ Y nw d dlmZmZm	Z	 d dl
Z
d dlmZ d dlmZ d dlmZ d dlmZ d dlmZ d d	lmZmZmZ d
dlmZ e jddd Zdd Zdd Zdd Z dd Z!dd Z"dd Z#dd Z$dd Z%dd  Z&d!d" Z'd#d$ Z(e j)d%d&d'id(d'id)d'igi d*d*d*d+fd&d'id(d,id)d-igi d.d*d/d+fd0d1d2d)d'id3d3d4gi d5d6d*d7d7d8fd-d9d2d,d,d:gi d;d<d6d6d=fd3d3d2d(d'igi d*d>d2fd3d?d2gi d'd?d2fd3d3d2d(d'igd&d?id?d'd2fd?d?d2d)d?igi d?d?d?d+fd?d?d2d)d'igi d?d?d'd+fd?d?d2d)d?igd)d;id?d?d'd+fd3d3d'd'd=gd?d?d2d?d?d3d3d=fd3d3d'd'd=gd?d?d?d@d?d?d3d3d?dAfgdBdC Z*dDdE Z+dFdG Z,dHdI Z-dJdK Z.dLdM Z/G dNdO dOZ0e j)dPd dQdR dSdR fdTe0e0ddfgdUdV Z1dWdX Z2dS )Y    N)	StrictInt	StrictStr)ConfigValidationErrorLinearModel)German)English)Language)DEFAULT_TOK2VEC_MODEL)Doc)SimpleFrozenDictcombine_score_weightsregistry   )make_tempdiri  c                     s   d} d t | G  fddd}t }|j|  d}|jdks#J t '}|| d dd	iii}tj||d
}|	 jd	ksEJ W d    d S 1 sPw   Y  d S )Ntest_issue5137my_componentc                       s2   e Zd Z dfddZdd Zdd Zdd	 Zd
S )z#test_issue5137.<locals>.MyComponentall_categoriesc                 S   s   || _ || _|| _d S N)nlp
categoriesname)selfr   r   r    r   \/home/ubuntu/.local/lib/python3.10/site-packages/spacy/tests/pipeline/test_pipe_factories.py__init__   s   
z,test_issue5137.<locals>.MyComponent.__init__c                 S      d S r   r   r   docr   r   r   __call__!      z,test_issue5137.<locals>.MyComponent.__call__c                 [   r   r   r   )r   pathkwargsr   r   r   to_disk$   r    z+test_issue5137.<locals>.MyComponent.to_diskc                 [   r   r   r   )r   r!   cfgr   r   r   	from_disk'   r    z-test_issue5137.<locals>.MyComponent.from_diskN)__name__
__module____qualname__r   r   r#   r%   r   	pipe_namer   r   MyComponent   s
    r+   r   r   
componentsr   my_categoriesconfig)
r	   factoryr   add_piper   r   r#   spacyloadget_pipe)factory_namer+   r   r   tmpdir	overridesnlp2r   r)   r   r      s   
"r   c                  C   s   d} t | dtdtfdd}| tjv sJ t  }tt || W d    n1 s/w   Y  ||  | |j	v s@J |j
|  | ksIJ t | sPJ || sWJ || }||ksbJ || }||ksmJ d S )Ntest_componentr   returnc                 S      | S r   r   r   r   r   r   	component7   s   z/test_pipe_function_component.<locals>.component)r	   r>   r   r   	factoriespytestraises
ValueErrorr2   
pipe_namespipe_factoriesget_factory_metaget_pipe_metar5   create_pipe)r   r>   r   piper   r   r   test_pipe_function_component4   s"   


rI   c               	      sP  d} d}t | G dd d}G dd d t ||fdt f fdd	}t  }| |f| ffD ]s\}}|tjv s=J tt |||| W d    n1 sUw   Y  || ||jv sfJ |j	| |ksoJ t 
|svJ ||s}J ||}t||sJ t|jt sJ ||}t||sJ t|jt sJ q2d S )
Ntest_class_component1test_class_component2c                   @   0   e Zd ZdedefddZdedefddZd	S )
z2test_pipe_class_component_init.<locals>.Component1r   r   c                 S   
   || _ d S r   r   r   r   r   r   r   r   r   P      
z;test_pipe_class_component_init.<locals>.Component1.__init__r   r;   c                 S      |S r   r   r   r   r   r   r   S   r    z;test_pipe_class_component_init.<locals>.Component1.__call__Nr&   r'   r(   r	   strr   r   r   r   r   r   r   
Component1N   s    rT   c                   @   rL   )
z2test_pipe_class_component_init.<locals>.Component2r   r   c                 S   rM   r   rN   rO   r   r   r   r   W   rP   z;test_pipe_class_component_init.<locals>.Component2.__init__r   r;   c                 S   rQ   r   r   r   r   r   r   r   Z   r    z;test_pipe_class_component_init.<locals>.Component2.__call__NrR   r   r   r   r   
Component2V   s    rU   r   c                    s
    | |S r   r   r   r   rU   r   r   r1   ]      
z/test_pipe_class_component_init.<locals>.factory)r	   r1   r   r?   r@   rA   rB   r2   rC   rD   rE   rF   r5   
isinstancer   rG   )name1name2rT   r1   r   r   	ComponentrH   r   rW   r   test_pipe_class_component_initJ   s2   


r]   c                  C   s  d} t | G dd d}t| G dd d}t  }tt ||  W d    n1 s2w   Y  tt |j| dddd	 W d    n1 sQw   Y  tt |j| d
dddd	 W d    n1 sqw   Y  |	| }t
|jt sJ |jd
ksJ |jdksJ |jdu sJ |j| ksJ t }tt |j| dddd	 W d    n1 sw   Y  |j| d
ddd	 |	| }t
|jtsJ |jd
ksJ |jdksJ |jdu sJ d S )Ntest_class_component_configc                   @   8   e Zd ZdedededefddZdedefd	d
Z	dS )z3test_pipe_class_component_config.<locals>.Componentr   r   value1value2c                 S   s"   || _ || _|| _d| _|| _d S )NT)r   r`   ra   is_baser   r   r   r   r`   ra   r   r   r   r   x   s
   
z<test_pipe_class_component_config.<locals>.Component.__init__r   r;   c                 S   rQ   r   r   r   r   r   r   r      r    z<test_pipe_class_component_config.<locals>.Component.__call__N
r&   r'   r(   r	   rS   r   r   r   r   r   r   r   r   r   r\   v   s    
	r\   c                   @   r_   )z5test_pipe_class_component_config.<locals>.ComponentENr   r   r`   ra   c                 S   s   || _ || _|| _d| _d S )NF)r   r`   ra   rb   rc   r   r   r   r      s   
z>test_pipe_class_component_config.<locals>.ComponentEN.__init__r   r;   c                 S   rQ   r   r   r   r   r   r   r      r    z>test_pipe_class_component_config.<locals>.ComponentEN.__call__Nrd   r   r   r   r   ComponentEN   s    
re   10hello)r`   ra   r/   
   
wrong_name)r`   ra   r   TF)r	   r1   r   r@   rA   r   r2   warnsUserWarningr5   rY   r   r`   ra   rb   r   )r   r\   re   r   rH   nlp_enr   r   r    test_pipe_class_component_configs   sB   

rm   c                  C   sf   d} t | G dd d}t  }||  || }t|jt s#J |jdks*J |jdks1J d S )Ntest_class_component_defaultsc                	   @   sF   e Zd ZededfdedededefddZd	ed
efddZ	dS )z5test_pipe_class_component_defaults.<locals>.Componentrh   rg   r   r   r`   ra   c                 S   s   || _ || _|| _d S r   )r   r`   ra   rc   r   r   r   r      s   
z>test_pipe_class_component_defaults.<locals>.Component.__init__r   r;   c                 S   rQ   r   r   r   r   r   r   r      r    z>test_pipe_class_component_defaults.<locals>.Component.__call__N)
r&   r'   r(   r   r   r	   rS   r   r   r   r   r   r   r   r\      s    
r\   rh   rg   )r	   r1   r2   r5   rY   r   r`   ra   )r   r\   r   rH   r   r   r   "test_pipe_class_component_defaults   s   

ro   c                  C   s   d} dt dddddddd	}tj| |d
G dd d}t }||  || }t|jts3J |jdks:J t|jt	sBJ d S )Ntest_class_component_modelzspacy.TextCatEnsemble.v2zspacy.TextCatBOW.v3F   )@architecturesexclusive_classes
ngram_sizeno_output_layer)rr   tok2veclinear_modelrh   )modelr`   default_configc                   @   r_   )z2test_pipe_class_component_model.<locals>.Componentr   rx   r   r`   c                 S      || _ || _|| _|| _d S r   r   rx   r`   r   r   r   rx   r   r`   r   r   r   r      s   
z;test_pipe_class_component_model.<locals>.Component.__init__r   r;   c                 S   rQ   r   r   r   r   r   r   r      r    z;test_pipe_class_component_model.<locals>.Component.__call__N)
r&   r'   r(   r	   r   rS   r   r   r   r   r   r   r   r   r\      s    r\   )
r
   r	   r1   r2   r5   rY   r   r`   rx   r   )r   rz   r\   r   rH   r   r   r   test_pipe_class_component_model   s&   



r~   c                  C   sf  d} |  d}d|dddd}t j| |dG dd	 d	}t|d
tdtfdd}t  }d|dddd}|j| |d || }t|jt sLJ |j	dksSJ t|j
ts[J |j
jdkscJ t  }tt d|dddd}|j| |d W d    n1 sw   Y  tt d|dddd}|j| |d W d    d S 1 sw   Y  d S )N!test_class_component_model_customz.archrq   r   )rr   nOnI)r`   rx   ry   c                	   @   s@   e Zd ZedfdedededefddZded	efd
dZ	dS )z9test_pipe_class_component_model_custom.<locals>.Componentrh   r   rx   r   r`   c                 S   r{   r   r|   r}   r   r   r   r      s   
zBtest_pipe_class_component_model_custom.<locals>.Component.__init__r   r;   c                 S   rQ   r   r   r   r   r   r   r      r    zBtest_pipe_class_component_model_custom.<locals>.Component.__call__N)
r&   r'   r(   r   r	   r   rS   r   r   r   r   r   r   r   r\      s    
r\   r   r   c                 S   s
   t | |S r   )r   )r   r   r   r   r   make_custom_arch   rX   z@test_pipe_class_component_model_custom.<locals>.make_custom_arch   r   r/   linear20      ?g       @)r	   r1   r   architecturesr   r2   r5   rY   r   r`   rx   r   r   r@   rA   r   )r   archrz   r\   r   r   r0   rH   r   r   r   &test_pipe_class_component_model_custom   s0   

"r   c                  C   s   t t tjdtdtfdd} W d    n1 sw   Y  t t tjdtdtfdd}W d    n1 s=w   Y  t t tddtdtfdd	}W d    d S 1 saw   Y  d S )
Nfoobarc                 S   r   r   r   r   r   r   r   r   r>     r    z4test_pipe_factories_wrong_formats.<locals>.componentc                 S   r   r   r   r   r   r   r   factory1  r    z3test_pipe_factories_wrong_formats.<locals>.factory1 test_pipe_factories_missing_argsc                 S   r   r   r   r   r   r   r   factory2#  r    z3test_pipe_factories_wrong_formats.<locals>.factory2)r@   rA   rB   r	   r>   intrS   r1   )r>   r   r   r   r   r   !test_pipe_factories_wrong_formats  s   "r   c                  C   s\  t  } | jddd | d | dsJ | dsJ | ds$J | ds+J | ds2J | ds9J | dd | dsFJ | dsMJ tt | 	d W d   n1 sbw   Y  | 	d d| j
vssJ d| jvszJ tt | dd W d   n1 sw   Y  | dd | dsJ | djdksJ dS )zTest that component-specific meta and config entries are represented
    correctly and cleaned up when pipes are removed, replaced or renamed.nerner_componentr,   textcattcNparser)r	   r2   rE   rF   get_pipe_configrename_piper@   rA   rB   remove_pipe
_pipe_meta_pipe_configsreplace_piper1   rN   r   r   r   %test_pipe_factory_meta_config_cleanup'  s0   

r   c                  C   s@   d} t j| di iddt dtdtfdd}t  }||  dS )	zeTest that default config values can be empty dicts and that no config
    validation error is raised.&test_pipe_factories_empty_dict_defaultr   ry   r   r   c                 S   r   r   r   )r   r   r   r   r   r   r1   H  r    z7test_pipe_factories_empty_dict_default.<locals>.factoryN)r	   r1   rS   dictrG   )r   r1   r   r   r   r   r   B  s
   r   c                  C   s`  d} d}t j| dd d tj| dd d tj|dd d t | s&J t |r-J t| s4J t|r;J t| sBJ t|sIJ t  }||  dksVJ tt || W d	   n1 skw   Y  t }||  d
ks}J tt || W d	   n1 sw   Y  t }||  dksJ || dksJ d	S )zfTest that language sub-classes can have their own factories, with
    fallbacks to the base factories.specific_component1specific_component2c                   S      dS )Nbaser   r   r   r   r   <lambda>T      z7test_pipe_factories_language_specific.<locals>.<lambda>funcc                   S   r   )Nenr   r   r   r   r   r   U  r   c                   S   r   )Nder   r   r   r   r   r   V  r   r   Nr   r   )	r	   r>   r   r   has_factoryrG   r@   rA   rB   )rZ   r[   r   rl   nlp_der   r   r   %test_pipe_factories_language_specificO  s0   r   c                  C   s   t tjtsJ tt dtjd< W d   n1 sw   Y  t } t | jts-J t| js4J tt d| jd< W d   dS 1 sJw   Y  dS )z`Test that assigning directly to Language.factories is now invalid and
    raises a custom error.r   r   N)rY   r	   r?   r   r@   rA   NotImplementedErrorlenrN   r   r   r   test_language_factories_invalidl  s   "r   zweights,override,expectedar   bcgQ?)r   r   r   2   d   g{Gz?Q?ffffff?g333333?)r   r         ?)degq=
ףp?皙?(\?)r   r   r   r   r   i,  )r   r   皙?g333333?)r   r   r   r   gq=
ףp?        )r   r   f)r   r   r   r   r   c                 C   s.   t | |}t| dv sJ ||ksJ d S )N)gGz?r   r   )r   sumvalues)weightsoverrideexpectedresultr   r   r   -test_language_factories_combine_score_weightsy  s   
&r   c                  C   s  d} dd }ddd}dddd	}t j|  d
||d t j|  d||d t |  d
}|j|ks6J t |  d}|j|ksEJ t  }i |jd d< ||  d
 ||  d |jd }dddddd}|d |kstJ |j }	d|	d d d< d|	d d d< t	|	}|jd d }
dddddd}|
|ksJ |j }	d |	d d d< t	|	}|jd d }
d ddddd}|
|ksJ d S )Ntest_language_factories_scoresc                 S      dd S )Nc                 S   r<   r   r   r=   r   r   r   r     r   zBtest_language_factories_scores.<locals>.<lambda>.<locals>.<lambda>r   rV   r   r   r   r         z0test_language_factories_scores.<locals>.<lambda>r   )a1a2r   r   r   )b1b2b31)default_score_weightsr   2trainingscore_weightsg      ?gffffff?g?)r   r   r   r   r   r   r   g?r   gQ?r   g?r   )
r	   r1   rE   r   _configr2   r0   copyr   from_config)r   r   weights1weights2meta1meta2r   r$   expected_weightsr0   r   r   r   r   r   r     s<   





r   c                  C   s   t  } | jddd t  }tt |jddd W d   n1 s$w   Y  |jd| d d|jv s7J tt |jd| d W d   dS 1 sOw   Y  dS )z+Test adding components from a source model.tagger	my_taggerr,   en_core_web_smsourceNcustom)r   r2   r@   rA   rB   rC   KeyError)
source_nlpr   r   r   r   test_pipe_factories_from_source  s   "r   c                     s   G dd dt j tdG  fdddt } t  }|d |  }|jd|d d|jv s1J t }|jd|d d|jv sBJ t  }|jj	d |jjj
d	g d
d tt |jd|d W d    d S 1 sow   Y  d S )Nc                   @   s   e Zd ZeddgZdS )zPtest_pipe_factories_from_source_language_subclass.<locals>.CustomEnglishDefaultsr   stopN)r&   r'   r(   set
stop_wordsr   r   r   r   CustomEnglishDefaults  s    r   	custom_enc                       s   e Zd ZdZ ZdS )zHtest_pipe_factories_from_source_language_subclass.<locals>.CustomEnglishr   N)r&   r'   r(   langDefaultsr   r   r   r   CustomEnglish  s    r   r   r   )rq      cat)rq   r      r   )vector)r   r   r   	languagesr2   rC   r   vocabvectorsresizeaddr@   rj   rk   )r   r   r   r   r   r   1test_pipe_factories_from_source_language_subclass  s"   
"r   c                  C   s   d} t j| ddiddtfdd}t }|d |j| ddid	 t }|j| |d
 | |jv s4J || jd dks@J |jd |  }|d | ksOJ |d dksWJ dS )zBTest adding components from a source model with custom components.&test_pipe_factories_from_source_customargrg   ry   c                 S   r   )Nc                 S   r<   r   r   r=   r   r   r   r     r   zNtest_pipe_factories_from_source_custom.<locals>.test_factory.<locals>.<lambda>r   r   r   r   r   r   r   test_factory     z<test_pipe_factories_from_source_custom.<locals>.test_factoryr   worldr/   r   r-   r1   N)	r	   r1   rS   r   r2   rC   rF   rz   r0   )r   r   r   r   r0   r   r   r   r     s   
r   c            
      C   s4  d} t j| ddiddtfdd}t }|d |j| ddd	id
 dddgd}t #}|| ddit|ddd}||d}t|}W d    n1 sTw   Y  |jddgksbJ |j	d| dkslJ |
d}|j| ksxJ |jd dksJ |jd d }	|	d | ksJ |	d d	ksJ d S )N&test_pipe_factories_from_source_configr   rg   ry   c                 S   r   )Nc                 S   r<   r   r   r=   r   r   r   r     r   zNtest_pipe_factories_from_source_config.<locals>.test_factory.<locals>.<lambda>r   r   r   r   r   r   	  r   z<test_pipe_factories_from_source_config.<locals>.test_factoryr   yolor   )r   r0   r   r   r   r   pipeliner1   )r   r>   )r   r   r   r-   r-   )r	   r1   rS   r   r2   r   r#   r   rC   rD   rF   rz   r0   )
r   r   r   dest_nlp_cfgtempdirdest_components_cfgdest_configr   metar0   r   r   r   r     s.   



r   c                   @   s   e Zd Zdd Zdd ZdS )PipeFactoriesIdempotentc                 C   r   r   r   rO   r   r   r   r   $  r   z PipeFactoriesIdempotent.__init__c                 C   r   r   r   r   r   r   r   r   &  r   z PipeFactoriesIdempotent.__call__N)r&   r'   r(   r   r   r   r   r   r   r  #  s    r  zi,func,func2c                 C   r   )Nc                 S   r<   r   r   r=   r   r   r   r   ,  r   z<lambda>.<locals>.<lambda>r   rV   r   r   r   r   ,  r   r   c                 C   r<   r   r   r=   r   r   r   r   ,  r   rq   c                 C   s   d|  }t dD ]	} tj||d q	t }|| tj||d | d}t dD ]	} tj||d q+t }|| tj||d dS )zCheck that decorator can be run multiple times if the function is the
    same. This is especially relevant for live reloading because we don't
    want spaCy to raise an error if a module registering components is reloaded.
    )test_pipe_factories_decorator_idempotent_   r   r   N)ranger	   r1   r2   r>   )ir   func2r   r   r[   r   r   r   (test_pipe_factories_decorator_idempotent)  s   



r  c                  C   s~   d} dd }t j| |d d| gd| d| iid}t|}|j| gks'J || }|d| ik |j|  d| iks=J d	S )
z~Test that the extra values we temporarily add to component config
    blocks/functions are removed and not copied around.
    'test_pipe_factories_config_excludes_nlpc                 S   r   )Nc                 S   r<   r   r   r=   r   r   r   r   J  r   zKtest_pipe_factories_config_excludes_nlp.<locals>.<lambda>.<locals>.<lambda>r   rV   r   r   r   r   J  r   z9test_pipe_factories_config_excludes_nlp.<locals>.<lambda>r   r   r   r1   r  N)r	   r1   r   r   rC   r   r   )r   r   r0   r   pipe_cfgr   r   r   r  E  s   



r  )3r@   pydantic.v1r   r   ImportErrorpydantic	thinc.apir   r   r   r3   spacy.lang.der   spacy.lang.enr   spacy.languager	   spacy.pipeline.tok2vecr
   spacy.tokensr   
spacy.utilr   r   r   utilr   markissuer   rI   r]   rm   ro   r~   r   r   r   r   r   r   parametrizer   r   r   r   r   r   r  r  r  r   r   r   r   <module>   s    

)9#,"" 


%#
