o
    ih                  "   @   s$  d dl mZmZmZmZmZmZmZmZ d dl	m
Z
 d dlmZmZ ddlmZmZ ddlmZ ddlmZ ddlmZ dd	lmZmZ dd
lmZmZ ddlmZmZm Z  ddl!m"Z"m#Z# ddl$m%Z%m&Z& ddl'm(Z( ddl)m*Z*m+Z+ ddl,m-Z-m.Z. ddl/m0Z0m1Z1 ddl2m3Z3 ddl4m5Z5m6Z6 ddl7m8Z8m9Z9 ddl:m;Z< ddl:m=Z=m>Z>m?Z? ddl@mAZAmBZBm;Z;mCZCmDZD ddlEmFZFmGZG ddlHmIZImJZJ ddlKmLZLmMZM ddlNmOZOmPZP ddlQmRZR ddlSmTZT ddlUmVZV daWdd"d#ZXd$ed%eYd&eeeY  d'eZd(ee f
d)d*Z[d$ed%eYd+eZd(ee fd,d-Z\d!d.d$ed%eYd/e
d0eeY d1e]d2eZd3eZd4e]d5eeeTgee f d6eeeeT geee  f d7eeVe]gef d'eZd(ee d8eZd9e]d:ee^ f d;d<Z_d$ed/ee
 d%eYd=eYd'eZd(ee fd>d?Z`d$ed%eYd/e
eeR ee f d:e^d(ee d eJfd@dAZad d dBd$ed%eYdCe]dDe]fdEdFZbd$ed%eYdGeeYef dHeZfdIdJZcd$ed%eYd/e
d ePfdKdLZdd$ed%eYdMeDd/e
eeeR ef ef dNeYd(ee d:e^dOee] d eCfdPdQZed$ed%eYdMeDd/e
eeeR ef ef dNeYdRe^dSeZd(ee d eCfdTdUZfd$ed%eYdVeee]eYf  dWed+eZdXeZd(ee dYeYfdZd[Zgd$ed%eYdVeee]eYf  dWed+eZdXeZdYeYd(ee fd\d]Zhd$ed%eYdNeeY d^eeeeT eeT geeT f  d_eZd`eeeT eeT geeT f dVeee]eYf  dWed+eZd'eZd(ee fdadbZid$ed%eYd/e
dceeY dde]d'eZdee]d(ee fdfdgZjd$ed%eYd/e
eeR ee f d:e^d(ee d eMfdhdiZkd$ed%eYd/e
eeR ef dNeYd:e^djee] dCee] d(ee d e9fdkdlZld$ed%eYd/e
dmee dne]doeeY d(ee fdpdqZmd$ed%eYd/e
dmee dne]dre]dse^dte^doeeY d(ee fdudvZnd$ed%eYd/e
dmee dne]dweZdxe]d(ee fdydzZod$ed%eYd/e
dmee dne]dweZdxe]dre]dse^dte^d(ee fd{d|Zpd$ed%eYd/e
d'eZd(ee d}eYd~e^fddZqd$ed%eYd/e
deer deYf
ddZsd$ed/e
d%eYd'eZdeZd~e^d(ee fddZtd$ed%eYd/e
d'eZd(ee f
ddZud!S )    )AnyCallableDictIterableListOptionalTupleUnion)Model)Floats2dRagged   )	CandidateKnowledgeBase)Language)TransitionSystem)AttributeRuler)DEFAULT_PARSER_MODELDependencyParser)"DEFAULT_EDIT_TREE_LEMMATIZER_MODELEditTreeLemmatizer)DEFAULT_NEL_MODELEntityLinkerEntityLinker_v1)DEFAULT_ENT_ID_SEPEntityRuler)
DocCleanerTokenSplitter)
Lemmatizer)DEFAULT_MORPH_MODELMorphologizer)DEFAULT_MT_MODELMultitaskObjective)DEFAULT_NER_MODELEntityRecognizerSentencizer)DEFAULT_SENTER_MODELSentenceRecognizer)DEFAULT_SPAN_FINDER_MODEL
SpanFinder)DEFAULT_SPANS_KEY)	SpanRulerprioritize_existing_ents_filterprioritize_new_ents_filter)DEFAULT_SPANCAT_MODEL!DEFAULT_SPANCAT_SINGLELABEL_MODELr+   SpanCategorizer	Suggester)DEFAULT_TAGGER_MODELTagger)DEFAULT_SINGLE_TEXTCAT_MODELTextCategorizer)DEFAULT_MULTI_TEXTCAT_MODELMultiLabel_TextCategorizer)DEFAULT_TOK2VEC_MODELTok2Vec)Doc)Span)VocabFreturnNc                   C   s  t rdS tjddddiddt tjdg d	d
gtg ddddddiddiddidddidddddddddt tjdg ddddiddtddidddddddt tjd d!gdd"ddd#id$d%didt tjd&d'gdt	dd(id)ddddddddddd*
dt
 tjd+d,d-d.dd/t tjd0ddd1dd2dt tjd3d4gd5tid6t tjd7d8gtddd9id:dddd;dt tjd<d=d>gtdddd?idd@dAdAddBdt tjdCdDgdAtdtdEg dFdGddHidIddddJdt tjdKdDgttddEg dFdGddHiddLddddJdt tjdMdNgdddddidOddidPddddddt tjdQdDgtddddRidddidddStdTdU	dVt dWddVt dXddVt dYddVt dZdidt tjd[d!gg td\d]dddd#id^d%did_t tjd`d'gdAtddaid)ddddddddddd*
dt tjdbdDgdAttd,dddcidddVt dWddVt dXddVt dYdidt  tjdeg dfddgt!dddhididdddddt" tjdjg dfddgt!dkdAdldddhidmddddddt# tjdng doddgddpt$ddqidrdAdAdddddsdt% tjdtg doddgddpdudvdAt$ddqidw	dAdAdddddsdt& tjdxdygt'dddzid{dd|dddddd}dt( tjd~ddt)ddt* tjdd8dgdddd9iddddd;dt+ da dS )zRegister all factories with the registry.

    This function registers all pipeline component factories, centralizing
    the registrations that were previously done with @Language.factory decorators.
    Nattribute_rulerF@scorerszspacy.attribute_ruler_scorer.v1validatescorer)default_configentity_linker)doc.ents	doc.sentstoken.ent_iobtoken.ent_typeztoken.ent_kb_idr   T@   @misczspacy.CandidateGenerator.v1z spacy.CandidateBatchGenerator.v1zspacy.EmptyKB.v2zspacy.entity_linker_scorer.v1   )modellabels_discardn_sents
incl_priorincl_contextentity_vector_lengthget_candidatesget_candidates_batchgenerate_empty_kb	overwriterC   use_gold_entscandidates_batch_size	thresholdg      ?)nel_micro_fnel_micro_rnel_micro_p)requiresassignsrD   default_score_weightsentity_ruler)rF   rI   rH   zspacy.levenshtein_compare.v1zspacy.entity_ruler_scorer.v1phrase_matcher_attrmatcher_fuzzy_comparerB   overwrite_ents
ent_id_seprC   g        )ents_fents_pents_rents_per_type)r^   rD   r_   
lemmatizerztoken.lemmalookupzspacy.lemmatizer_scorer.v1)rM   moderV   rC   	lemma_acctextcatzdoc.catszspacy.textcat_scorer.v2)rY   rM   rC   )

cats_scorecats_score_desccats_micro_pcats_micro_rcats_micro_fcats_macro_pcats_macro_rcats_macro_fcats_macro_auccats_f_per_typetoken_splitter   
   
min_lengthsplit_length)rD   retokenizesdoc_cleaner)tensorz
_.trf_data)attrssilenttok2vecz
doc.tensorrM   )r^   rD   sentertoken.is_sent_startzspacy.senter_scorer.v1)rM   rV   rC   )sents_fsents_psents_rmorphologizerztoken.morphz	token.poszspacy.morphologizer_scorer.v1)rM   rV   extendrC   label_smoothingg      ?)pos_acc	morph_accmorph_per_featspancatz	doc.spanszspacy.ngram_suggester.v1)rL   r      )rK   sizeszspacy.spancat_scorer.v1)rY   	spans_keymax_positiverM   	suggesterrC   )
spans_sc_f
spans_sc_p
spans_sc_rspancat_singlelabel)r   rM   negative_weightr   rC   allow_overlapfuture_entity_rulerrF   
__unused__)rb   rB   rd   rC   re   rc   
span_rulerz#spacy.first_longest_spans_filter.v1z)spacy.overlapping_labeled_spans_scorer.v1)r@   r   	r   spans_filterannotate_entsents_filterrb   rc   rB   rV   rC   spans__f_p_r	_per_typetrainable_lemmatizerorthr   )rM   backoffmin_tree_freqrV   top_krC   )r^   r]   rD   r_   textcat_multilabelz"spacy.textcat_multilabel_scorer.v2span_finderzspacy.span_finder_scorer.v1)rY   rM   r   
max_lengthr}   rC   ner)rF   rH   rI   d   zspacy.ner_scorer.v1)movesupdate_with_oracle_cut_sizerM   incorrect_spans_keyrC   beam_nerg{Gz?    )r   r   rM   beam_densitybeam_update_prob
beam_widthr   rC   parser)z	token.depz
token.headr   rG      zspacy.parser_scorer.v1)r   r   learn_tokensmin_action_freqrM   rC   )dep_uasdep_lasdep_las_per_typer   r   r   beam_parser   g-C6?)	r   r   r   r   r   r   r   rM   rC   taggerz	token.tagzspacy.tagger_scorer.v1!)rM   rV   rC   
neg_prefixr   )tag_accr   tag_micro_ptag_micro_rtag_micro_fnn_labellerdep_tag_offset)labelstargetrM   sentencizerrG   punct_charsrV   rC   ),FACTORIES_REGISTEREDr   factorymake_attribute_rulerr   make_entity_linkerr   make_entity_rulermake_lemmatizerr5   make_textcatmake_token_splittermake_doc_cleanerr9   make_tok2vecr'   make_senterr   make_morphologizerr+   r/   make_spancatr0   make_spancat_singlelabelmake_future_entity_rulerSPAN_RULER_DEFAULT_SPANS_KEYmake_span_rulerr   make_edit_tree_lemmatizerr7   make_multilabel_textcatr)   make_span_finderr#   make_nermake_beam_nerr   make_parsermake_beam_parserr3   make_taggerr!   make_nn_labellermake_sentencizer r   r   L/home/ubuntu/.local/lib/python3.10/site-packages/spacy/pipeline/factories.pyregister_factories7   s  		

	

			

	r   nlpnamer   rV   rC   c                 C   s   t ||||dS )Nr   r%   )r   r   r   rV   rC   r   r   r   r     s   r   rB   c                 C   s   t | j|||dS )NrA   )r   vocab)r   r   rB   rC   r   r   r   r        r   )rY   rM   rN   rO   rP   rQ   rR   rS   rT   rU   rW   rX   rY   c                C   sX   |j ddst| j||||||||||dS t| j|||||||||	|
|||||dS )Ninclude_span_makerF)rN   rO   rP   rQ   rR   rS   rV   rC   )rN   rO   rP   rQ   rR   rS   rT   rU   rV   rC   rW   rX   rY   )r   getr   r   r   )r   r   rM   rN   rO   rP   rQ   rR   rS   rT   rU   rV   rC   rW   rX   rY   r   r   r   r     s@   r   rl   c                 C   s   t | j|||||dS )N)rl   rV   rC   )r   r   )r   rM   r   rl   rV   rC   r   r   r   r   0  s   r   c                 C      t | j||||dS N)rY   rC   )r6   r   r   r   rM   rY   rC   r   r   r   r   =  s   r   r|   r}   r~   c                C      t ||dS )Nr|   )r   )r   r   r}   r~   r   r   r   r   G  s   r   r   r   c                C   r   )N)r   )r   )r   r   r   r   r   r   r   r   M  s   r   c                 C   s   t | j||S )N)r:   r   )r   r   rM   r   r   r   r   Q  s   r   r   r   r   c                 C   s    t | j||||d d|||ddS )NTF)
rM   r   r   r   r   r   r   rY   rC   add_negative_labelr1   r   )r   r   r   rM   r   rC   rY   r   r   r   r   r   U     
r   r   r   c                 C   s    t | j||||||ddd |dS )NrL   T)
rM   r   r   r   r   r   r   r   rY   rC   r   )r   r   r   rM   r   r   r   rC   r   r   r   r   n  r   r   rb   rc   rd   re   c           	      C   s,   |rt }nt}t| |d d d||||d|dS )NTFr   )r.   r-   r,   )	r   r   rb   rc   rB   rd   rC   re   r   r   r   r   r     s    
r   c              
   C   s   t | |||||||dS )Nra   )r   )r   r   rb   rc   rB   rd   re   rC   r   r   r   r        
r   r   r   r   c                 C   s   t | |||||||||	|
dS )Nr   )r,   )r   r   r   r   r   r   rb   rc   rB   rV   rC   r   r   r   r     s   r   r   r   r   c              
   C      t | j|||||||dS )N)r   r   rV   r   rC   )r   r   )r   r   rM   r   r   rV   r   rC   r   r   r   r        
r   c                 C   r   r   )r8   r   r   r   r   r   r     s   r   r   c              
   C   s   t | |||||||dS )N)rM   rY   r   rC   r   r}   r   )r*   )r   r   rM   r   rY   r   r}   rC   r   r   r   r     r   r   r   r   r   c              	   C      t | j||||||dS )N)r   r   r   r   rC   r$   r   )r   r   rM   r   r   r   rC   r   r   r   r        	r   r   r   r   c
           
      C   s   t | j|||||||||	d
S )N)r   r   r   r   r   r   r   rC   r   )
r   r   rM   r   r   r   r   r   r   rC   r   r   r   r   "  s   r   r   r   c              
   C   r   )N)r   r   r   r   r   rC   r   r   )r   r   rM   r   r   r   r   rC   r   r   r   r   <  r   r   c                 C   s    t | j|||||||||	|
dS )N)	r   r   r   r   r   r   r   r   rC   r  )r   r   rM   r   r   r   r   r   r   r   rC   r   r   r   r   R  s   r   r   r   c              	   C   r   )N)r   rV   rC   r   r   )r4   r   )r   r   rM   rV   rC   r   r   r   r   r   r   n  r  r   r   r   c                 C   s   t | j|||dS )N)r   )r"   r   )r   r   rM   r   r   r   r   r   r     r   r   r   c              	   C   s   t | j||||||dS )N)rV   r   r   rC   )r    r   )r   rM   r   rV   r   r   rC   r   r   r   r     r  r   c                 C   r   )N)rV   rC   )r(   r   )r   r   rM   rV   rC   r   r   r   r     s   r   )r>   N)vtypingr   r   r   r   r   r   r   r	   	thinc.apir
   thinc.typesr   r   kbr   r   languager   ,pipeline._parser_internals.transition_systemr   pipeline.attributerulerr   pipeline.dep_parserr   r   pipeline.edit_tree_lemmatizerr   r   pipeline.entity_linkerr   r   r   pipeline.entityrulerr   r   pipeline.functionsr   r   pipeline.lemmatizerr   pipeline.morphologizerr   r    pipeline.multitaskr!   r"   pipeline.nerr#   r$   pipeline.sentencizerr&   pipeline.senterr'   r(   pipeline.span_finderr)   r*   pipeline.span_rulerr+   r   r,   r-   r.   pipeline.spancatr/   r0   r1   r2   pipeline.taggerr3   r4   pipeline.textcatr5   r6   pipeline.textcat_multilabelr7   r8   pipeline.tok2vecr9   r:   
tokens.docr;   tokens.spanr<   r   r=   r   r   strboolr   r   intfloatr   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   dictr   r   r   r   r   r   r   <module>   s   ( 
   1


	

9


"	
	


	



	

	


	




