o
    Ni                     @   s(  d dl Z d dlZejejeZe je d dlm	Z	 d dl
mZ d dlT d dlT d dl Z d dl mZ dZdd Zd	d
 Zdd Zdd Zdd Zdd Zdd ZdedededefddZedkree jdkrse  ed ee jd ee jd ee jd  ee jd! Zee dS dS )"    N)Lexer)yacc)*)exit)kaki_cconjsyll2_cfullvowel_bkaki_akaki_bconjsyll2_bconjsyll2_a	conjsyll1	nukchan_b	nukchan_ayarulefullvowel_avowelc                 C   s~   | j jjjdkr5| d | j jj_| j jjjddkr)t| j jjjdd| j jj_| j jj jd7  _dS | d | j jj_dS )z
    sentence : words
    r      z&&&N)	parsergflags
parseLevelwordssyllabifiedWordOutfindrec_replacephonifiedWordp r!   P/home/ubuntu/.local/lib/python3.10/site-packages/indic_unified_parser/uparser.py
p_sentence   s   r#   c                 C   .   | j jjjrtd| d   | d | d< dS )z
    words : syltoken
    Syll:	r   r   Nr   r   r   DEBUGprintr   r!   r!   r"   p_words_syltoken"   s   r)   c                 C   s6   | j jjjrtd| d   | d | d  | d< dS )z 
    words : words syltoken
    r%      r   r   Nr&   r   r!   r!   r"   p_words_wordsandsyltoken*   s   r+   c                 C   s   | d | d< dS )a  
    syltoken : fullvowel_b
             | fullvowel_a
             | conjsyll2_c
             | conjsyll2_b
             | conjsyll2_a
             | conjsyll1 
             | nukchan_b
             | nukchan_a
             | yarule
             | vowel
    r   r   Nr!   r   r!   r!   r"   
p_syltoken2   s   r,   c                 C   r$   )zV
    syltoken :
             | kaki_c
             | kaki_a
             | kaki_b
    zkaki : r   r   Nr&   r   r!   r!   r"   p_syltoken1A   s   r-   c                 C   s   t d td d S )Nzparse errorr   )r(   r   r   r!   r!   r"   p_errorL   s   r.   c                   C   s4   t d t d t d t d t d t d d S )Nz"UnifiedParser - Usage Instructionsz0Run python3 parser.py wd lsflag wfflag clearflagzwd - word to parse in unicode.z)lsflag - always 0. we are not using this.zOwfflag - 0 for Monophone parsing, 1 for syllable parsing, 2 for Akshara Parsingzsclearflag - 1 for removing the lisp like format of output and to just produce space separated output. Otherwise, 0.)r(   r!   r!   r!   r"   	printHelpQ   s   r/   wdlsflagwfflag	clearflagc                 C   s  t  }t }t }||_d|j_| d} |dvs|dvr%td td ||j_	||j_
|dkr9d|j_
d|j_| }|jjrFtd|  t|}|jjrUtd	|  t||d
kr^d
S t||d
krgd
S |jjrstd|j  t||}|jjrtd|jj  td|jj  |j|jj|d |jjrtd|jj  t|jjddd |j_|jjrtd|jj  t||jj|jj	|j_|jjrtd|jj  |jjrtd|jj  t|jj|j_|jjrtd|jj  |js|jjrtd d
}tt|jjD ]}	|jj|	 dkr|d7 }qd}
t||jjddkrL|dkrJt||jjd}|dkrBd}
n
|dkrId}
nd}
d
}tt|jjD ]}	|jj|	 dkre|d7 }||
krn|	} nqV|jj|jj}}||d  }|d | }|jjrtd| d| d|  t||}|jjrtd|jj  || |j_|jjrtd|jj  t|jj|j_|jjrtd|jj  t|jj|j_|jjrtd|jj  t|jjd
|j_t ||jj|j_t!|jj|j_t"||jj t#| |dkrD|jj$}|%d}t|}d}	d |_&|	|k r?| j&||	 d 7  _&|	d7 }	|	|k s+|j&  |j&S )!NFu     )r   r   )r   r   r*   zInvalid inputr      zWord : zCleared Word : r   z	langId : zSymbols code : zSymbols syllables : )lexerzSyllabified Word : z&#&r   zSyllabified Word out : zSyllabified Word langCorr : zSyllabified Word gemCorr : zSyllabified Word memCorr : z	NOT SOUTHr*      zposi  zprefinal : zprefinal1 : zfinal : z	final0 : " )'GLOBALSr   r   r   r   r'   stripr(   r   LangSpecificCorrectionFlagwriteFormatsyllTagFlagRemoveUnwantedSetlanguageFeatCheckDictionarylangIdConvertToSymbolsr   unicodeWordsyllabifiedWordparser   r   LangSpecificCorrectionCleanseWordisSouthrangelenGetPhoneTypeSchwaSpecificCorrectionSchwaDoubleConsonentGeminateCorrectionMiddleVowel
SyllabilfySplitSyllablesWritetoFiles
outputTextsplitanswer)r0   r1   r2   r3   r   r5   r   wordcountisplitPositiontpestartendtlnr!   r!   r"   	wordparse[   s   

















r`   __main__   r   r   r*   r6   r4   )sysospathdirnameabspath__file__
SCRIPT_DIRappendply.lexr   ply.yaccr   globalshelpersr   tokensr#   r)   r+   r,   r-   r.   r/   strintr`   __name__rK   argvansr(   r!   r!   r!   r"   <module>   s2    
v2