o
    2wib                     @   s   d Z ddlZddlZddlZddlZddlmZ ddlmZ zej	
d W n ey4   ed Y nw ddlmZmZmZmZmZmZmZmZmZmZmZmZ ddlmZmZmZmZ dd	lmZm Z m!Z!m"Z"m#Z#m$Z$ dd
l%m&Z& ddl'm(Z( G dd de)Z*e+dkre* Z,e,d dS dS )z"
https://github.com/kyubyong/g2pK
    N)h2j)cmudictzcorpora/cmudict.zipr   )jyeoyeconsonant_uijosa_uivowel_uijamorieulgiyeok
rieulbieub
verb_nieunbalb
palatalizemodifying_rieul)link1link2link3link4)annotatecomposegroupglossparse_tableget_rule_id2text)convert_eng)convert_numc                   @   s0   e Zd Zdd Zdd ZdddZdd	d
ZdS )G2pc                 C   sH   |   | _t | _t | _t | _t	j
t	j
t	j
td| _d S )Nz
idioms.txt)	get_mecabmecabr   tabler   dictcmur   	rule2textospathjoindirnameabspath__file__idioms_pathself r,   F/home/ubuntu/sommelier/.venv/lib/python3.10/site-packages/g2pk/g2pk.py__init__   s
   

$zG2p.__init__c              
   C   s,   zt  W S  ty } ztdd }~ww )NzKIf you want to install mecab, The command is... pip install python-mecab-ko)r   MeCab	Exception)r+   er,   r,   r-   r   $   s   
zG2p.get_mecabFc           	      C   sf   d}|}t | jdddD ]}|dd  }d|v r)|d\}}t|||}qt|||| |S )ua  Process each line in `idioms.txt`
        Each line is delimited by "===",
        and the left string is replaced by the right one.
        inp: input string.
        descriptive: not used.
        verbose: boolean.

        >>> idioms("지금 mp3 파일을 다운받고 있어요")
        지금 엠피쓰리 파일을 다운받고 있어요
        zfrom idioms.txtrutf8)encoding#r   z===)openr)   splitstripresubr   )	r+   stringdescriptiveverboseruleoutlinestr1str2r,   r,   r-   idioms,   s   z
G2p.idiomsTc                    s    |||}t| j}t| j}t|}t|}ttt	t
ttttttttfD ]}||||}q)tdd|} jD ]*\}}	}
|}t||	|}t|
dkr]d fdd|
D }nd}t|||| q<ttttfD ]}||||}qm|r|t|}|rt|}|S )u_  Main function
        string: input string
        descriptive: boolean.
        verbose: boolean
        group_vowels: boolean. If True, the vowels of the identical sound are normalized.
        to_syl: boolean. If True, hangul letters or jamo are assembled to form syllables.

        For example, given an input string "나의 친구가 mp3 file 3개를 다운받고 있다",
        STEP 1. idioms
        -> 나의 친구가 엠피쓰리 file 3개를 다운받고 있다

        STEP 2. English to Hangul
        -> 나의 친구가 엠피쓰리 파일 3개를 다운받고 있다

        STEP 3. annotate
        -> 나의/J 친구가 엠피쓰리 파일 3개/B를 다운받고 있다

        STEP 4. Spell out arabic numbers
        -> 나의/J 친구가 엠피쓰리 파일 세개/B를 다운받고 있다

        STEP 5. decompose
        -> 나의/J 친구가 엠피쓰리 파일 세개/B를 다운받고 있다

        STEP 6-9. Hangul
        -> 나의 친구가 엠피쓰리 파일 세개를 다운받꼬 읻따
        z/[PJEB] r   
c                 3   s    | ]
} j |d V  qdS )rD   N)r"   get).0rule_idr*   r,   r-   	<genexpr>z   s    zG2p.__call__.<locals>.<genexpr>)rC   r   r!   r   r   r   r   r   r   r   r   r   r	   r
   r   r   r   r   r   r9   r:   r   lenr%   r   r   r   r   r   r   r   )r+   r;   r<   r=   group_vowelsto_sylinpfuncrA   rB   rule_ids_inpr>   r,   r*   r-   __call__C   s2   
zG2p.__call__N)FF)FFFT)__name__
__module____qualname__r.   r   rC   rQ   r,   r,   r,   r-   r      s
    	
r   __main__u5   나의 친구가 mp3 file 3개를 다운받고 있다)-__doc__r#   r9   nltkr   r	   r   nltk.corpusr   datafindLookupErrordownloadg2pk.specialr   r   r   r   r   r
   r   r   r   r   r   g2pk.regularr   r   r   r   
g2pk.utilsr   r   r   r   r   r   g2pk.englishr   g2pk.numeralsr   objectr   rR   g2pr,   r,   r,   r-   <module>   s*   8 q