o
    Qi&                     @   sN   d dl mZmZ ddlmZmZ G dd dZdd Zdd
dZdddZ	dS )    )compose	decompose   )	conjugateconjugate_chatc                   @   s0   e Zd ZdddZdd ZdddZd	d
 ZdS )
LemmatizerNc                 C   s,   || _ || _|   |r| j| d S d S N)_stems_endings_initialize_predefinedupdate)selfstemsendings
predefined r   Q/home/ubuntu/.local/lib/python3.10/site-packages/soynlp/lemmatizer/_lemmatizer.py__init__   s   zLemmatizer.__init__c                 C   s   ddd| _ d S )N)u   붇다u   불다)u	   그렇다)u   불어u   그래)r   )r   r   r   r   r      s   zLemmatizer._initializeFc           	      C   s   t  }tdt|d D ]6}|d | ||d  }}t||| jD ]\}}|| jv rA|r5|||f q"|| jv rA|||f q"q|S Nr   )setrangelenlemma_candidater   r	   addr
   )	r   wordcheck_only_stem
candidatesilrstemendingr   r   r   	lemmatize   s   

zLemmatizer.lemmatizec                 C   sP   t  }tdt|d D ]}|d | }||d  }|| ||| j q|S r   )r   r   r   r   r   r   )r   r   r   r   r   r    r   r   r   r      s   zLemmatizer.candidatesr   )F)__name__
__module____qualname__r   r   r#   r   r   r   r   r   r      s
    

r   c                 C   s   t d| || d S )Nz{}: {} + {})printformat)messager   r    r   r   r   debug_message'   s   r*   NFc                    s    fdd}dd }t | ||| t| d }|sA||d rA| d d t|d |d d	 }|r7td
|d  t ||||  S )Nc                         | |f d S r   r   r!   r"   r   r   r   	add_lemma+      z'lemma_candidate_chat.<locals>.add_lemmac                 S   s   | t dv S )Nu   ㄷㅂㅅㅇㅋㅎ)r   )cr   r   r   character_is_emoticon.   s   z3lemma_candidate_chat.<locals>.character_is_emoticon   r   r    u:   마지막 종성이 이모티콘으로 의심되는 경우z())r   r   r   r*   r   )r   r    r   debugr/   r2   l_lastl_r   r.   r   lemma_candidate_chat*   s    r9   c                    s   fdd}| |fh | | }t | d }t|d |d d}| d d }|r-t |d nd}	|r;t|	d |	d dnd}
|dd  }|d d	krh|	d d
krh|t|d |d d }||| |rhtd|| |d d	kr|
dksv|
dkr|t|d |d d d }td
|	d |	d | }||| |rtd|| |d dkr|t|d |d d }|
dks|
dkrtd
|
dkrdnd|	d r|	d nd| }n|r|d dkrtd
d|	d r|	d nd| }n|}||| |rtd|| |d dks|d d	ks|d dks|d dks|d dkrJdD ].}|d |kr&q|t|d |d | }|d | }||| |rHtd| || q|d dkry| d dkry|	d d
kry|t|d |d d }||| |rytd || |d!kr|d" }td
|d |d | }||| |rtd#|| |d d$kr|t|d dd }td
d|d | }||| |rtd%|| |d d&kr|t|d d'd }td
d|d | }||| |rtd(|| |d dks|d dkr |t|d d)d }td
|d |d | }||| |r td*|| |d dkrP|	d d
krP|	d dks<|	d dkrP| d+ }|}||| |rPtd,|| |d d-krz|d d.krz|d/ }td
d|d | }||| |rztd0|| |d dks|d dks|d d	ks|d dks|d dkr0|d dks|d dkr|t|d |d d- }|d dkr|n|d | }||| |rtd1|| |d d.ks|d d2kr0t| dkr| d3 d4kr|d d	kr|d5 }n|t|d |d d2kr
dndd- }td
|d d2krdnd|d | }||| |r0td6|| |d dksE|d dksE|d dkr|d d7kr|d d
krZ|d d7ksa|d d
ks|t|d d8d }td
d|d | }||| |rtd9|| |r| |f|v r|| |f D ]} | |rtd:|| qt } D ](\}}|sqt |d d d-krqt||}||v r|||f q|S );Nc                    r+   r   r,   r-   r.   r   r   r/   @   r0   z"lemma_candidate.<locals>.add_lemmar3   r   r   r5   ) r:   r:   r4   u   ㄹu   ㅇu   ㄷu   ㄷ 불규칙 활용u   러u   라u   르u   르 불규칙 활용u   ㅂu   워u   와u   ㅏu   ㅓu   려u   ㅜu   ㅂ 불규칙 활용u   ㄴu   ㅁu   ㅆu
    ㄹㅂㅎu,   어미의 첫글자가 종성일 경우 (%s)u   벗u   ㅅu   ㅅ 불규칙 활용u   퍼u   푸u   우 불규칙 활용 (퍼)u   ㅝu   우 불규칙 활용u   ㅘu   ㅗu   오 불규칙 활용u   ㅡu!   ㅡ 탈락 불규칙 활용 (꺼)u   으u$   ㅡ 탈락 불규칙 활용 (모으)u   ㅎu   ㅐu   하u   여 불규칙 활용u   ㅎ 탈락 불규칙 활용u   ㅔu   그u   렇u   ㅎ 축약 불규칙 활용u   ㅕu   ㅣu   이었 -> 였 규칙 활용
Predefined)r   r   r*   r   r   r   r   )r   r    r   r6   r/   r   r7   l_last_l_frontr_firstr_first_r_endl_stemr_canonjongsungr!   candidates_eomisurfacesr   r.   r   r   ?   s   


."
F
*





8

F
*
&&
**



r   )NF)
soynlp.hangler   r   _conjugationr   r   r   r*   r9   r   r   r   r   r   <module>   s   !
