o
    iT                     @   s  d dl Z d dlZd dlmZ d dlmZ d dlmZmZm	Z	m
Z
mZmZmZmZmZmZmZ z d dlmZmZmZmZmZmZmZmZmZmZ d dlmZ W n' eyu   d dl mZmZmZmZmZmZmZmZmZmZ d dl!mZ Y nw d dl"m#Z#m$Z$m%Z% d dl&m'Z' d	d
l(m)Z) d	dl*m+Z+ d	dl,m-Z- d	dl.m/Z/ erd	dl0m1Z1 d	dl2m3Z3 d	dl4m5Z5 edZ6ee	ee6 geee6  f e'f Z7ee	de8ged f e'f Z9ee	dgee	e
e8ef gdf e	f f e'f Z:dee de
e8ef dee8 fddZ;G dd dZ<G dd dZ=e> dddd e	d!ee8 d"e8d#e?def
d$d%Z@dd&d'd(d e	d)e
e8ef d*ee8 d"e8d!ee8 de
e8ef fd+d,ZAdeBdee8 fd-d.ZCG d/d0 d0eZDG d1d2 d2eZEG d3d4 d4e8eZFG d5d6 d6eZGeeFeGf ZHeeDef ZIeeEeef ZJeeDeEe8eKeLeBe?f ZMe+d7 ZNG d8d9 d9eZOG d:d; d;eZPG d<d= d=eZQG d>d? d?eZRG d@dA dAeZSG dBdC dCeZTG dDdE dEeZUG dFdG dGeZVG dHdI dIeZWeSeReUeVdJZXG dKdL dLeZYG dMdN dNeZZG dOdP dPeZ[G dQdR dReZ\dS )S    N)defaultdict)Enum)TYPE_CHECKINGAnyCallableDictIterableListOptionalTupleTypeTypeVarUnion)
	BaseModelConstrainedStrField
StrictBoolStrictFloat	StrictInt	StrictStrValidationErrorcreate_model	validator)ModelMetaclass)ConfigValidationErrorModel	Optimizer)Promise   )NAMES)Literal)Lookups)is_cython_func)Language)Example)VocabItemTr#   r$   schemaobjreturnc                 C   s   z
| di | g W S  t yJ } z4| }tt}|D ]}ddd |dg D }|| |d qdd | D W  Y d}~S d}~ww )	zValidate data against a given pydantic schema.

    obj (Dict[str, Any]): JSON-serializable data to validate.
    schema (pydantic.BaseModel): The schema to validate against.
    RETURNS (List[str]): A list of error messages, if available.
    z -> c                 S   s   g | ]}t |qS  )str).0pr*   r*   A/home/ubuntu/.local/lib/python3.10/site-packages/spacy/schemas.py
<listcomp>T   s    zvalidate.<locals>.<listcomp>locmsgc                 S   s&   g | ]\}}d | dd | qS )[z] z, )join)r,   r0   r1   r*   r*   r.   r/   V   s   & Nr*   )r   errorsr   listr3   getappenditems)r'   r(   er4   dataerrorerr_locr*   r*   r.   validateF   s   r=   c                   @      e Zd ZdZdZdS )ArgSchemaConfigforbidTN__name__
__module____qualname__extraarbitrary_types_allowedr*   r*   r*   r.   r?   \       r?   c                   @   r>   )ArgSchemaConfigExtrar@   TNrA   r*   r*   r*   r.   rH   a   rG   rH   ArgModelT)excludenamestrictfuncrJ   rK   rL   c                C   s   i }zt | }W n ty   t|td Y S w d}|j D ]6}|j|v r(q |j|j	kr1d}q |j
|jkr:|j
nt}t| rBdnd}	|j|jkrM|jn|	}
||
f||j< q |o[| }|r`tnt|d< t|fi |S )a  Generate a pydantic model for function arguments.

    func (Callable): The function to generate the schema for.
    exclude (Iterable[str]): Parameter names to ignore.
    name (str): Name of created model class.
    strict (bool): Don't allow extra arguments if no variable keyword arguments
        are allowed on the function.
    RETURNS (ModelMetaclass): A pydantic model.
    )
__config__FTN.rN   )inspect	signature
ValueErrorr   rH   
parametersvaluesrK   kindVAR_KEYWORD
annotationemptyr   r"   defaultr?   )rM   rJ   rK   rL   sig_argssighas_variableparamrV   default_emptyrX   	is_strictr*   r*   r.   get_arg_modelf   s(   

r_    )get_examplesnlp)sectionrK   rJ   settingsrc   c          	   
   C   sp   t | |dd}z
|d	i | W S  ty7 } z|sdnd| }d| d}t|| ||ddd}~ww )
a  Validate initialization settings against the expected arguments in
    the method signature. Will parse values if possible (e.g. int to string)
    and return the updated settings dict. Will raise a ConfigValidationError
    if types don't match or required values are missing.

    func (Callable): The initialize method of a given component etc.
    settings (Dict[str, Any]): The settings from the respective [initialize] block.
    section (str): Initialize section, for error message.
    name (str): Name of the block in the section.
    exclude (Iterable[str]): Parameter names to exclude from schema.
    RETURNS (Dict[str, Any]): The validated settings.
    InitArgModel)rJ   rK   
initializezinitialize.z-Error validating initialization settings in [])titler4   configparentNr*   )r_   dictr   r   r4   )	rM   rd   rc   rK   rJ   r'   r9   blockrh   r*   r*   r.   validate_init_settings   s   rm   c                    s\   dd  t | tr'g }| D ]}t |tr fdd| D }|| q|} ttd| iS )Nc                 S   s"   t | tr| ttk rt|  S | S N)
isinstanceintlenr   )kr*   r*   r.   <lambda>   s   " z(validate_token_pattern.<locals>.<lambda>c                    s   i | ]	\}} ||qS r*   r*   )r,   rr   vget_keyr*   r.   
<dictcomp>   s    z*validate_token_pattern.<locals>.<dictcomp>pattern)ro   r5   rk   r8   r7   r=   TokenPatternSchema)r(   	convertedrx   r*   ru   r.   validate_token_pattern   s   

r{   c                   @   s   e Zd ZU edddZeeed f  ed< edddZ	ee
e  ed< edddZee
e  ed< edd	dZee
e  ed
< edddZee
e  ed< edddZee
e  ed< edddZeeed f  ed< edddZeeed f  ed< edddZeeed f  ed< edddZeeed f  ed< edddZeeed f  ed< edddZeeed f  ed< edddZeeed f  ed< edddZeeed f  ed< edddZeeed f  ed < edd!dZeeed f  ed"< G d#d$ d$Zed%d&d&d&d'd(d) ZdS )*TokenPatternStringNregexaliasREGEXinINnot_inNOT_IN	is_subset	IS_SUBSETis_supersetIS_SUPERSET
intersects
INTERSECTSfuzzyFUZZYfuzzy1FUZZY1fuzzy2FUZZY2fuzzy3FUZZY3fuzzy4FUZZY4fuzzy5FUZZY5fuzzy6FUZZY6fuzzy7FUZZY7fuzzy8FUZZY8fuzzy9FUZZY9c                   @   r>   )zTokenPatternString.Configr@   TNrB   rC   rD   rE   allow_population_by_field_namer*   r*   r*   r.   Config   rG   r   *Tpre	each_itemallow_reusec                 C      |d u rt d|S NzNone / null is not allowedrQ   clsrt   r*   r*   r.   raise_for_none      z!TokenPatternString.raise_for_none)rB   rC   rD   r   r   r
   r   r   __annotations__r   r	   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r*   r*   r*   r.   r|      sL   
   r|   c                   @   s  e Zd ZU edddZee ed< edddZee	e
  ed< edddZee	e
  ed< edd	dZee	e
  ed
< edddZee	e
  ed< edddZee	e
  ed< edddZeee
ef  ed< edddZeee
ef  ed< edddZeee
ef  ed< edddZeee
ef  ed< edddZeee
ef  ed< edddZeee
ef  ed< G dd dZedddddd d! ZdS )"TokenPatternNumberNr}   r~   r   r   r   r   r   r   r   r   r   r   r   z==EQz!=NEQz>=GEQz<=LEQ>GT<LTc                   @   r>   )zTokenPatternNumber.Configr@   TNr   r*   r*   r*   r.   r      rG   r   r   Tr   c                 C   r   r   r   r   r*   r*   r.   r     r   z!TokenPatternNumber.raise_for_none)rB   rC   rD   r   r   r
   r   r   r   r	   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r*   r*   r*   r.   r      s    
       r   c                   @   sN   e Zd ZU edZeed< edZeed< edZeed< edZeed< d	S )
TokenPatternOperatorSimple+plusr   star?question!exclamationN)	rB   rC   rD   r   r   r   r   r   r   r*   r*   r*   r.   r     s
   
 r   c                   @   s   e Zd ZedZdS )TokenPatternOperatorMinMaxz^({\d+}|{\d+,\d*}|{\d*,\d+})$N)rB   rC   rD   recompiler}   r*   r*   r*   r.   r     s    r   )r`   IOBr   r         c                   @   s  e Zd ZU dZee ed< dZee ed< dZee ed< dZ	ee ed< dZ
ee ed< dZee ed< dZee ed< dZee ed	< dZee ed
< dZee ed< dZee ed< dZee ed< dZee ed< dZee ed< dZee ed< dZee ed< dZee ed< dZee ed< dZee ed< dZee ed< dZee ed< dZee ed< dZee ed< dZ ee ed< dZ!ee ed< dZ"ee ed< dZ#ee ed< dZ$ee ed< dZ%ee ed< dZ&ee ed< dZ'ee ed < dZ(ee ed!< dZ)ee ed"< dZ*ee ed#< dZ+ee ed$< dZ,ee- ed%< e.dd&d'Z/ee0e1e2f  ed(< G d)d* d*Z3e4d+d,d,d-d.d/ Z5dS )0TokenPatternNorthtextlowerpostagmorphdeplemmashapeent_typeent_iobent_id	ent_kb_idnormlengthspacyis_alphais_asciiis_digitis_loweris_upperis_titleis_punctis_space
is_bracketis_quoteis_left_punctis_right_punctis_currencyis_stopis_sent_start
sent_startlike_numlike_url
like_emailop_r~   
underscorec                   @   s   e Zd ZdZdZdd ZdS )zTokenPattern.Configr@   Tc                 C   s   |   S rn   )upper)valuer*   r*   r.   rs   I  s    zTokenPattern.Config.<lambda>N)rB   rC   rD   rE   r   alias_generatorr*   r*   r*   r.   r   F  s    r   r   T)r   r   c                 C   r   r   r   r   r*   r*   r.   r   K  r   zTokenPattern.raise_for_none)6rB   rC   rD   r   r
   StringValuer   r   r   r   r   r   r   r   r   r   r   IobValuer   r   r   r   NumberValuer   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   TokenPatternOperatorr   r   r   r   UnderscoreValuer   r   r   r*   r*   r*   r.   r     sR   
  r   c                   @   s4   e Zd ZU edddZee ed< G dd dZdS )ry   .r   )	min_itemsrx   c                   @      e Zd ZdZdS )zTokenPatternSchema.Configr@   NrB   rC   rD   rE   r*   r*   r*   r.   r   U      r   N)	rB   rC   rD   r   rx   r	   r   r   r   r*   r*   r*   r.   ry   R  s   
 ry   c                   @   s  e Zd ZU edddZeed< edddZeed< edddZeed< ed	d
dZ	eed< edddZ
eed< eg ddZee ed< eg ddZee ed< ed	ddZeed< ed	ddZeed< ed	ddZeed< ed	ddZeed< ed	ddZeed< edddZeeee eeeef  f  ed< ei d dZeeef ed!< ei d"dZeeee f ed#< ei d$dZeeef ed%< ed	d&dZeed'< dS )(ModelMetaSchema.z#Two-letter language code, e.g. 'en'rh   langz
Model namerK   zModel versionversionr`   z#Compatible spaCy version identifierspacy_versionr   z9Name of parent spaCy package, e.g. spacy or spacy-nightlyparent_packagezIAdditional Python package dependencies, used for the Python package setuprequirementszNames of pipeline componentspipelinezModel descriptiondescriptionzModel licenselicensezModel author nameauthorzModel author emailemailzModel author URLurlNzTraining data sourcessourceszIncluded word vectorsvectorsz)Component labels, keyed by component namelabelszAccuracy and speed numbersperformancezCommit of spaCy version usedspacy_git_version)rB   rC   rD   r   r   r   r   rK   r   r   r   r  r	   r  r  r  r  r  r  r  r
   r   r   r+   r	  r   r
  r  r  r*   r*   r*   r.   r   \  s$   
 0 r   c                   @   s  e Zd ZU edddZeed< edddZeed< edddZe	ed< edd	dZ
eed
< edddZeed< edddZeed< edddZeed< edddZeed< edddZee ed< edddZee ed< edddZeed< edddZeeeeeef  f ed< edddZeed< edddZeed< edddZee ed < edd!dZee ed"< edd#dZ ee!d$gd$f  ed%< edd&dZ"ee!d$eee#f gd'f  ed(< G d)d* d*Z$d'S )+ConfigSchemaTraining.z"Path in the config to the dev datar   
dev_corpus'Path in the config to the training datatrain_corpusBatcher for the training databatcherDropout ratedropoutzBHow many steps to continue without improvement in evaluation scorepatience%Maximum number of epochs to train for
max_epochsz+Maximum number of update steps to train for	max_stepsz-How often to evaluate during training (steps)eval_frequencyzRandom seedseedz$Memory allocator when running on GPUgpu_allocatorz,Whether to divide the batch up into substepsaccumulate_gradientz<Scores to report and their weights for selecting final modelscore_weightsThe optimizer to use	optimizerz%The logger to track training progressloggerz=Pipeline components that shouldn't be updated during trainingfrozen_componentsz?Pipeline components that should set annotations during trainingannotating_componentszPOptional callback to modify nlp object after training, before it's saved to diskr#   before_to_diskzDOptional callback that is invoked at the start of each training stepNbefore_updatec                   @   r>   )zConfigSchemaTraining.Configr@   TNrA   r*   r*   r*   r.   r     rG   r   )%rB   rC   rD   r   r  r   r   r  r  Batcherr  r   r  r   r  r  r  r  r
   r  r  r  r   r   r  r   r   Loggerr!  r	   r+   r"  r#  r   r$  r   r   r*   r*   r*   r.   r  z  s(   
 (",r  c                   @   s  e Zd ZU edddZeed< edddZee ed< edddZ	ee ed< edd	dZ
eed
< edddZeeed ged f  ed< edddZeedgdf  ed< edddZeedgdf  ed< edddZee ed< edddZeed< G dd dZdS )ConfigSchemaNlp.zThe base language to user   r   z%The pipeline component names in orderr  z)Pipeline components to disable by defaultdisabledzThe tokenizer to use	tokenizerz@Optional callback to modify Language class before initializationr#   before_creationz\Optional callback to modify nlp object after creation and before the pipeline is constructedafter_creationzHOptional callback to modify nlp object after the pipeline is constructedafter_pipeline_creationzDefault batch size
batch_sizezVectors implementationr	  c                   @   r>   )zConfigSchemaNlp.Configr@   TNrA   r*   r*   r*   r.   r     rG   r   N)rB   rC   rD   r   r   r   r   r  r	   r(  r)  r   r*  r
   r   r+  r,  r-  rp   r	  r   r*   r*   r*   r.   r'    s   
 *""r'  c                   @   s   e Zd ZG dd dZdS )ConfigSchemaPretrainEmptyc                   @   r   )z ConfigSchemaPretrainEmpty.Configr@   Nr   r*   r*   r*   r.   r     r   r   N)rB   rC   rD   r   r*   r*   r*   r.   r.    s    r.  c                   @   s   e Zd ZU edddZeed< edddZeed< edddZ	e
e ed< edd	dZe
e ed
< edddZeed< edddZeed< edddZeed< edddZeed< edddZeed< edddZedegef ed< G dd dZdS )ConfigSchemaPretrain.r  r   r  r  r  zASaving additional temporary model after n batches within an epochn_save_everyz Saving model after every n epochn_save_epochr  r  r  corpusr  r  z'Component to find the layer to pretrain	componentz(Layer to pretrain. Whole model if empty.layerz2A function that creates the pretraining objective.r%   	objectivec                   @   r>   )zConfigSchemaPretrain.Configr@   TNrA   r*   r*   r*   r.   r     rG   r   N)rB   rC   rD   r   r  r   r   r  r   r0  r
   r1  r  r   r2  r   r  r%  r3  r+   r4  r5  r   r   r   r*   r*   r*   r.   r/    s   
  r/  c                   @   s   e Zd ZU edddZee ed< edddZee	 ed< edddZ
ee ed< edd	dZee ed
< edddZeeef ed< edddZeeeeef f ed< edddZeedgdf  ed< edddZeedgdf  ed< G dd dZdS )ConfigSchemaInit.z&Path to JSON-formatted vocabulary filer   
vocab_dataz-Vocabulary lookups, e.g. lexeme normalizationlookupszPath to vectorsr	  z"Path to pretrained tok2vec weightsinit_tok2vecz0Arguments to be passed into Tokenizer.initialize)helpr)  zYArguments for TrainablePipe.initialize methods of pipeline components, keyed by component
componentsz<Optional callback to modify nlp object before initializationr#   before_initz;Optional callback to modify nlp object after initialization
after_initc                   @   r>   )zConfigSchemaInit.Configr@   TNrA   r*   r*   r*   r.   r     rG   r   N)rB   rC   rD   r   r7  r
   r   r   r8  r!   r	  r9  r)  r   r   r;  r<  r   r=  r   r*   r*   r*   r.   r6    s   
 $""r6  c                   @   sp   e Zd ZU eed< eed< i Zeee	f ed< e
ee
eef f ed< e
eef ed< eed< G dd dZd	S )
ConfigSchematrainingrb   pretrainingr;  corporarf   c                   @   r>   )zConfigSchema.ConfigallowTNrA   r*   r*   r*   r.   r     rG   r   N)rB   rC   rD   r  r   r'  r@  r   r/  r.  r   r+   r   Readerr6  r   r*   r*   r*   r.   r>    s   
 r>  )rb   r?  r@  rf   c                   @   s   e Zd ZU eed< eed< dS )RecommendationTrfItemrK   size_factorN)rB   rC   rD   r+   r   rp   r*   r*   r*   r.   rD       
 rD  c                   @   s   e Zd ZU eed< eed< dS )RecommendationTrf
efficiencyaccuracyN)rB   rC   rD   rD  r   r*   r*   r*   r.   rG    rF  rG  c                   @   s:   e Zd ZU dZee ed< dZee ed< dZ	e
ed< dS )RecommendationSchemaNword_vectorstransformerThas_letters)rB   rC   rD   rK  r
   r+   r   rL  rG  rM  boolr*   r*   r*   r.   rJ    s   
 rJ  c                	   @   sl  e Zd ZU dZedddZeeee	f  e
d< edddZeeeeeeef f   e
d< edddZeeeeef   e
d	< ed
ddZee
d< edddZeeeeeeeeef f  f  e
d< ed
ddZeeeeeef f  e
d< eddddZeeeef  e
d< edddZeeeeeeef  f  e
d< edddZeeeeeeef  f  e
d< dS )DocJSONSchemazB
    JSON/dict format for JSON representation of Doc objects.
    Nz+Categories with corresponding probabilitiesr   catszInformation on entitiesentsz+Indices of sentences' start and end indicessents.zDocument textr   z2Span information - end/start indices, label, KB IDspansz*Token information - ID, start, annotationstokensz4Any custom data stored in the document's _ attributer   )rh   r   underscore_docz1Any custom data stored in the token's _ attributeunderscore_tokenz0Any custom data stored in the span's _ attributeunderscore_span)rB   rC   rD   __doc__r   rP  r
   r   r   r   r   rQ  r	   r   r   rR  r   rS  rT  rU  r   rV  rW  r*   r*   r*   r.   rO    s<   
 &
"&*rO  )]rO   r   collectionsr   enumr   typingr   r   r   r   r   r	   r
   r   r   r   r   pydantic.v1r   r   r   r   r   r   r   r   r   r   pydantic.v1.mainr   ImportErrorpydanticpydantic.main	thinc.apir   r   r   thinc.configr   attrsr   compatr    r8  r!   utilr"   languager#   r?  r$   vocabr%   r&   r%  r+   rC  r&  r=   r?   rH   tuplerN  r_   rm   r5   r{   r|   r   r   r   r   r   r   rp   floatr   r   r   ry   r   r  r'  r.  r/  r6  r>  CONFIG_SCHEMASrD  rG  rJ  rO  r*   r*   r*   r.   <module>   s    400"0&
2


"/3

