o
    Wεi                     @   s4  d dl Z d dlZd dlZd dlZd dlZd dlZd dlZd dlmZ d dl	m
Z
 zejd W n ey>   ed Y nw d dlmZmZmZmZmZmZmZmZmZmZmZmZ d dlmZmZmZmZ d dl m!Z!m"Z"m#Z#m$Z$m%Z%m&Z& d d	l'm(Z( d d
l)m*Z* G dd de+Z,e-dkre, Z.e.d dS dS )    N)h2j)cmudictzcorpora/cmudict.zipr   )jyeoyeconsonant_uijosa_uivowel_uijamorieulgiyeok
rieulbieub
verb_nieunbalb
palatalizemodifying_rieul)link1link2link3link4)annotatecomposegroupglossparse_tableget_rule_id2text)convert_eng)convert_numc                   @   s@   e Zd Zdd Zdd Zdd Zdd Zdd
dZdddZdS )G2pc                 C   sP   |    |  | _t | _t | _t | _	t
jt
jt
jtd| _d S )Nz
idioms.txt)check_mecab	get_mecabmecabr   tabler   dictcmur   	rule2textospathjoindirnameabspath__file__idioms_pathself r-   ?/home/ubuntu/.local/lib/python3.10/site-packages/g2pkk/g2pkk.py__init__   s   

$zG2p.__init__c                 C   s   t ||gd}|S )N)fromlist)
__import__)r,   module_nametmpr-   r-   r.   load_module_func#   s   zG2p.load_module_funcc                 C   s   t  dkr#tjd}|d u }|r!td td}|  d S d S tjd}|d u }|rDtd tt	j
ddd	d
g}|  d S d S )NWindowseunjeonz*you have to install eunjeon. install it...zpip install eunjeonr   z2you have to install python-mecab-ko. install it...z-mpipinstallzpython-mecab-ko)platformsystem	importlibutil	find_specprint
subprocessPopenwaitsys
executable)r,   	spam_spec	non_foundpr-   r-   r.   r   '   s   
zG2p.check_mecabc              
   C   s   t  dkr!z
| d}| W S  ty  } ztdd }~ww z
| d}| W S  tyB } ztd W Y d }~d S d }~ww )Nr5   r6   z2you have to install eunjeon. "pip install eunjeon"r   zByou have to install python-mecab-ko. "pip install python-mecab-ko")r9   r:   r4   Mecab	Exceptionr>   MeCab)r,   mer-   r-   r.   r   8   s   



zG2p.get_mecabFc           
      C   s   d}|}t | jddd0}|D ]}|dd  }d|v r,|d\}}	t||	|}qt|||| W d   |S 1 s?w   Y  |S )	ua  Process each line in `idioms.txt`
        Each line is delimited by "===",
        and the left string is replaced by the right one.
        inp: input string.
        descriptive: not used.
        verbose: boolean.

        >>> idioms("지금 mp3 파일을 다운받고 있어요")
        지금 엠피쓰리 파일을 다운받고 있어요
        zfrom idioms.txtrutf8)encoding#r   z===N)openr*   splitstripresubr   )
r,   stringdescriptiveverboseruleoutflinestr1str2r-   r-   r.   idiomsG   s   
z
G2p.idiomsTc                    s    |||}t| j}t| j}t|}t|}ttt	t
ttttttttfD ]}||||}q)tdd|} jD ]*\}}	}
|}t||	|}t|
dkr]d fdd|
D }nd}t|||| q<ttttfD ]}||||}qm|r|t|}|rt|}|S )u_  Main function
        string: input string
        descriptive: boolean.
        verbose: boolean
        group_vowels: boolean. If True, the vowels of the identical sound are normalized.
        to_syl: boolean. If True, hangul letters or jamo are assembled to form syllables.

        For example, given an input string "나의 친구가 mp3 file 3개를 다운받고 있다",
        STEP 1. idioms
        -> 나의 친구가 엠피쓰리 file 3개를 다운받고 있다

        STEP 2. English to Hangul
        -> 나의 친구가 엠피쓰리 파일 3개를 다운받고 있다

        STEP 3. annotate
        -> 나의/J 친구가 엠피쓰리 파일 3개/B를 다운받고 있다

        STEP 4. Spell out arabic numbers
        -> 나의/J 친구가 엠피쓰리 파일 세개/B를 다운받고 있다

        STEP 5. decompose
        -> 나의/J 친구가 엠피쓰리 파일 세개/B를 다운받고 있다

        STEP 6-9. Hangul
        -> 나의 친구가 엠피쓰리 파일 세개를 다운받꼬 읻따
        z/[PJEB] r   
c                 3   s    | ]
} j |d V  qdS )r_   N)r#   get).0rule_idr+   r-   r.   	<genexpr>   s    zG2p.__call__.<locals>.<genexpr>)r^   r   r"   r   r   r   r   r   r   r   r   r   r	   r
   r   r   r   r   r   rS   rT   r    lenr&   r   r   r   r   r   r   r   )r,   rU   rV   rW   group_vowelsto_sylinpfuncr\   r]   rule_ids_inprX   r-   r+   r.   __call___   s2   
zG2p.__call__N)FF)FFFT)	__name__
__module____qualname__r/   r4   r   r   r^   rl   r-   r-   r-   r.   r      s    

r   __main__u5   나의 친구가 mp3 file 3개를 다운받고 있다)/r$   rS   r9   rB   r;   r?   nltkr	   r   nltk.corpusr   datafindLookupErrordownloadg2pkk.specialr   r   r   r   r   r
   r   r   r   r   r   g2pkk.regularr   r   r   r   g2pkk.utilsr   r   r   r   r   r   g2pkk.englishr   g2pkk.numeralsr   objectr   rm   g2pr-   r-   r-   r.   <module>   s*   (8  