o
    i'                     @   s   d dl mZmZmZmZmZ d dlmZ ddlm	Z	 ddl
mZ ddlmZ ddlmZ ddlmZ d	d	d
ZG dd de	Zdedeeeeef f fddZdS )    )CallableDictListOptionalTuple)Model   )
Lemmatizer)lemmatizer_score)POS)Token)Vocab")   «   »c                       s   e Zd Z	dddeddedee deded	ed
ee	 ddf fddZ
dedee fddZdedee fddZdedee fddZdedee fddZdedee fddZdedee fddZ  ZS )RussianLemmatizer
lemmatizer	pymorphy3Fmode	overwritescorervocabmodelnamer   r   r   returnNc                   s   |dv r(zddl m} W n ty   tdd w t| dd d u r'|dd| _n'|dv rOzddlm} W n ty@   td	d w t| dd d u rO|dd| _t j||||||d
 d S )N>   	pymorphy2pymorphy2_lookupr   )MorphAnalyzerzThe lemmatizer mode 'pymorphy2' requires the pymorphy2 library and dictionaries. Install them with: pip install pymorphy2# for Ukrainian dictionaries:pip install pymorphy2-dicts-uk_morphru)lang>   r   pymorphy3_lookupzThe lemmatizer mode 'pymorphy3' requires the pymorphy3 library and dictionaries. Install them with: pip install pymorphy3# for Ukrainian dictionaries:pip install pymorphy3-dicts-ukr   )r   r   ImportErrorgetattrr   r   super__init__)selfr   r   r   r   r   r   r   	__class__ L/home/ubuntu/.local/lib/python3.10/site-packages/spacy/lang/ru/lemmatizer.pyr&      s8   

zRussianLemmatizer.__init__tokenc                 C   s  |j }|j}|j }|dkrt||gS |dvr| |S | j|}g }|D ](}|j	s/q)t
t|j\}}	||ksL|dv rD|dv sL|dkrQ|dkrQ|| q)t|s[| gS |d u sit|dkrut|v ruttdd |D S |d	v r~g d
}
n|dkrddg}
n|dkrg d}
ng d}
|g }}|D ]+}t
t|j\}	}|
D ]}||v r||v r||  ||  kr nq|| qt|s| gS ttdd |D S )NPUNCT)ADJDETNOUNNUMPRONPROPNVERB)r0   r3   r2   r/      c                 S      g | ]}|j qS r*   normal_form.0analysisr*   r*   r+   
<listcomp>P       z9RussianLemmatizer._pymorphy_lemmatize.<locals>.<listcomp>)r.   r/   r0   r3   )CaseNumberGenderr1   r>   r@   )r>   r?   r@   Person)Aspectr@   Moodr?   TenseVerbFormVoicec                 S   r6   r*   r7   r9   r*   r*   r+   r<   q   r=   )textpos_morphto_dictPUNCT_RULESget_pymorphy_lookup_lemmatizer   parseis_knownoc2udstrtagappendlenlowerr   listdictfromkeys)r'   r,   stringuniv_pos
morphologyanalysesfiltered_analysesr;   analysis_pos_features_to_compareanalysis_morphfeaturer*   r*   r+   _pymorphy_lemmatize7   s^   







	

z%RussianLemmatizer._pymorphy_lemmatizec                 C   sD   |j }| j|}tdd |D }t|dkrtt|gS |gS )Nc                 S   r6   r*   r7   )r:   anr*   r*   r+   r<   y   r=   z@RussianLemmatizer._pymorphy_lookup_lemmatize.<locals>.<listcomp>r5   )rG   r   rN   setrT   nextiter)r'   r,   rY   r\   normal_formsr*   r*   r+   rM   t   s   z,RussianLemmatizer._pymorphy_lookup_lemmatizec                 C   
   |  |S Nrc   r'   r,   r*   r*   r+   pymorphy2_lemmatize~      
z%RussianLemmatizer.pymorphy2_lemmatizec                 C   ri   rj   rM   rl   r*   r*   r+   pymorphy2_lookup_lemmatize   rn   z,RussianLemmatizer.pymorphy2_lookup_lemmatizec                 C   ri   rj   rk   rl   r*   r*   r+   pymorphy3_lemmatize   rn   z%RussianLemmatizer.pymorphy3_lemmatizec                 C   ri   rj   ro   rl   r*   r*   r+   pymorphy3_lookup_lemmatize   rn   z,RussianLemmatizer.pymorphy3_lookup_lemmatize)r   )__name__
__module____qualname__r
   r   r   r   rQ   boolr   r&   r   r   rc   rM   rm   rp   rq   rr   __classcell__r*   r*   r(   r+   r      s6    	(=
r   oc_tagr   c           
      C   s  i ddddddddddd	d
ddddddddddddddddddddddddddddd d!d"d#d$d%d&d&d&d'd'd(d)d*
d+d,d-d.d/d0d1d d2d3d4d5d6dd7id8d9d:d9d8d;d<d=d>d?d@d@dAdBdCdDdDdEdFdGdHdIdJdKidL}dM}t  }t }| dNdOdO}|D ]+}dP}t| D ]\}}	||	v rdQ}|dRkr|	| }q|	| ||< q|s|| qt|dSkr| }|dTv rdU}n|dVkrdW}n|dXkrdY|dZ< t|dSks||fS )[NADJFr.   ADJSADVBADVApror/   COMPCONJCCONJGRNDr4   INFNINTJr0   NPROr2   NUMRr1   NUMBPNCTr-   PRCLPARTPREPADPPRTF)PRTSr4   AnimInan)animinanImpPerf)impfperfInsAccDatGenLocNomVoc)
abltaccsdatvgen1gen2gentloc2loctnomnvoctCmpSup)r~   SuprFemMascNeut)femnmascneutInd)imprindcPlurSing)plursingDigit123)1per2per3perexclinclFutPastPres)futrpastpresBrev)rz   r   ConvInfPartFin)r   r   r   r   r4   ActPass)actvpssvAbbrYes)_POSAnimacyrB   r>   Degreer@   rC   r?   NumFormrA   rD   VariantrE   rF   r   X ,FTr   r   )NamePatrSurnGeoxOrgnr3   AuxtAUXPltmPtanr?   )	rW   re   replacesplitsorteditemsaddrT   pop)
rx   gram_mapposr[   	unmatchedgramsgrammatchcateggmapr*   r*   r+   rP      s   	


6

rP   N)typingr   r   r   r   r   	thinc.apir   pipeliner	   pipeline.lemmatizerr
   symbolsr   tokensr   r   r   rK   r   rQ   rP   r*   r*   r*   r+   <module>   s    
&}