o
     ¢iÈ  ã                   @   s<   d dl mZmZ ddlmZ ddlmZ G dd„ deƒZdS )é    )ÚListÚTupleé   )Ú
Lemmatizer)ÚTokenc                       sT   e Zd ZdZededeee ee f f‡ fdd„ƒZde	dee fdd„Z
‡  ZS )	ÚFrenchLemmatizeraN  
    French language lemmatizer applies the default rule based lemmatization
    procedure with some modifications for better French language support.

    The parts of speech 'ADV', 'PRON', 'DET', 'ADP' and 'AUX' are added to use
    the rule-based lemmatization. As a last resort, the lemmatizer checks in
    the lookup table.
    ÚmodeÚreturnc                    s$   |dkrg d¢}|g fS t ƒ  |¡S )NÚrule)Úlemma_lookupÚlemma_rulesÚ	lemma_excÚlemma_index)ÚsuperÚget_lookups_config)Úclsr   Úrequired©Ú	__class__© úL/home/ubuntu/.local/lib/python3.10/site-packages/spacy/lang/fr/lemmatizer.pyr      s   z#FrenchLemmatizer.get_lookups_configÚtokenc                 C   s¦  |j |jf}|| jv r| j| S |j}|j ¡ }|dv r!| ¡ gS d| jvs*|dvr/|  |¡S | j di ¡}| j di ¡}| j di ¡}| j di ¡}| 	|i ¡}	| 	|i ¡}
| 	|g ¡}| ¡ }g }||	v rs| 
|¡ || j|< |S | |
 	|g ¡¡ g }|s°|D ]-\}}| |¡r¯|d t|ƒt|ƒ … | }|sœq‚||	v s¤| ¡ sª| 
|¡ q‚| 
|¡ q‚|s·| |¡ |sÅ| 
| 	||g¡d ¡ tt |¡ƒ}|| j|< |S )N)Ú ÚeolÚspacer   )ÚnounÚverbÚadjÚadpÚadvÚauxÚcconjÚdetÚpronÚpunctÚsconjr   r   r   r   )ÚorthÚposÚcacheÚtextÚpos_ÚlowerÚlookupsÚlookup_lemmatizeÚ	get_tableÚgetÚappendÚextendÚendswithÚlenÚisalphaÚlistÚdictÚfromkeys)Úselfr   Ú	cache_keyÚstringÚuniv_posÚindex_tableÚ	exc_tableÚrules_tableÚlookup_tableÚindexÚ
exceptionsÚrulesÚformsÚ	oov_formsÚoldÚnewÚformr   r   r   Úrule_lemmatize   sR   








€

zFrenchLemmatizer.rule_lemmatize)Ú__name__Ú
__module__Ú__qualname__Ú__doc__ÚclassmethodÚstrr   r   r   r   rH   Ú__classcell__r   r   r   r   r      s
    	(r   N)Útypingr   r   Úpipeliner   Útokensr   r   r   r   r   r   Ú<module>   s    