o
    'Ni#                     @   sn  d dl Z d dlZd dlmZ d dlmZ d dlmZ d dlmZ d dl	m
Z
 d dlmZ d dlmZ d d	lmZ d d
lmZ dZdd Zdd Zdd Zdd Zdd Zdd Zdd Zdd Zdd Zdd Zd d! Zd"d# Zd$d% Zd&d' Zd(d) Z d*d+ Z!d,d- Z"d.d/ Z#d0d1 Z$d2d3 Z%d4d5 Z&d6d7 Z'd8d9 Z(d:d; Z)d<d= Z*d>d? Z+e,d@kre-  e+  dS dS )A    N)loader)indic_tokenize)indic_detokenize)indic_normalize)unsupervised_morph)sentence_tokenize)syllabifier)unicode_transliterate)script_unifierzutf-8c                 C   s&   | j D ]}| jt|| j qd S N)infileoutfilewriter   trivial_detokenizelangargsline r   J/home/ubuntu/.local/lib/python3.10/site-packages/indicnlp/cli/cliparser.pyrun_detokenize   s   
r   c              	   C   s,   | j D ]}| jdt|| j qd S )N )r   r   r   joinr   trivial_tokenizer   r   r   r   r   run_tokenize   s
   

r   c                 C   sB   d dd | jD }t|| j}|D ]
}| j|d  qd S )Nr   c                 S   s    g | ]}| d d ddqS )
 )replace).0lr   r   r   
<listcomp>   s     z&run_sentence_split.<locals>.<listcomp>r   )r   r   r   sentence_splitr   r   r   )r   textoutlinesr   r   r   r   run_sentence_split   s
   r%   c                 C   sH   d}d}t  }|j| j||d}| jD ]}||}| j| qd S )NF
do_nothing)remove_nuktasnasals_mode)r   IndicNormalizerFactoryget_normalizerr   r   	normalizer   r   )r   r'   normalize_nasalsfactory
normalizerr   normalized_liner   r   r   run_normalize   s   

r0   c                 C   sL   d}t | j|}| jD ]}|| d}| jd	|d  qd S )NFr   r   )
r   UnsupervisedMorphAnalyzerr   r   morph_analyze_documentstripsplitr   r   r   )r   
add_markeranalyzerr   morph_tokensr   r   r   	run_morph0   s   
r8   c                    sB    j D ]}d fdd| dD } j|d  qd S )Nr   c                    s    g | ]}d  t| jqS )r   )r   r   orthographic_syllabifyr   )r   wr   r   r   r!   ;   s    z!run_syllabify.<locals>.<listcomp>r   )r   r   r3   r4   r   r   )r   r   new_liner   r;   r   run_syllabify8   s   

r=   c                 C   sZ   d}d}d}| j D ]}|d7 }|t|dd7 }|t|7 }q	td||| d S )Nr      r   z{} {} {})r   lenr3   r4   printformat)r   nlnwncr   r   r   r   run_wc@   s   
rE   c                 C   ,   | j D ]}tj|| j}| j| qd S r   )r   r	   ItransTransliterator	to_itransr   r   r   r   r   transliterated_liner   r   r   run_indic2romanO      
rK   c                 C   rF   r   )r   r	   rG   from_itransr   r   r   rI   r   r   r   run_roman2indicU   rL   rN   c                 C   s   d }| j dkrtjd| jd}n| j dkrtjd| jd}n| j dkr*tj| jd}|d us0J | jD ]}||| j}| j	
| q3d S )N
aggressiveto_anusvaara_relaxed)r(   common_langbasicr&   naive)rQ   )moder
   AggressiveScriptUnifierrQ   BasicScriptUnifierNaiveScriptUnifierr   	transformr   r   r   )r   unifierr   rJ   r   r   r   run_script_unify[   s   



rZ   c                 C   s0   | j D ]}tj|| j| j}| j| qd S r   )r   r	   UnicodeIndicTransliteratortransliteratesrclangtgtlangr   r   rI   r   r   r   run_script_converto   s   

r_   c                 C   sT   | j dtjdtddtjdd | j dtjdtddtjd	d | j d
ddd d S )Nr   rencoding?Input File pathtypenargsdefaulthelpr   r:   Output File pathz-lz--langLanguageri   add_argumentargparseFileTypeDEFAULT_ENCODINGsysstdinstdouttask_parserr   r   r   add_common_monolingual_argsu   s   
rw   c                 C   sd   | j dtjdtddtjdd | j dtjdtddtjd	d | j d
ddd | j dddd d S )Nr   r`   ra   rc   rd   re   r   r:   rj   z-sz	--srclangzSource Languagerl   z-tz	--tgtlangzTarget Languagerm   ru   r   r   r   add_common_bilingual_args   s$   
rx   c                 C   &   | j ddd}t| |jtd d S )Ntokenizeztokenizer helprl   func)
add_parserrw   set_defaultsr   
subparsersrv   r   r   r   add_tokenize_parser   
   r   c                 C   ry   )N
detokenizezde-tokenizer helprl   r{   )r}   rw   r~   r   r   r   r   r   add_detokenize_parser   r   r   c                 C   ry   )Nr"   zsentence split helprl   r{   )r}   rw   r~   r%   r   r   r   r   add_sentence_split_parser      r   c                 C   ry   )Nr+   znormalizer helprl   r{   )r}   rw   r~   r0   r   r   r   r   add_normalize_parser   r   r   c                 C   ry   )Nmorphz
morph helprl   r{   )r}   rw   r~   r8   r   r   r   r   add_morph_parser   r   r   c                 C   ry   )N	syllabifyzsyllabify helprl   r{   )r}   rw   r~   r=   r   r   r   r   add_syllabify_parser   r   r   c                 C   s>   | j ddd}|jdtjdtddtjdd	 |jtd
 d S )Nwczwc helprl   r   r`   ra   rc   rd   re   r{   )	r}   rn   ro   rp   rq   rr   rs   r~   rE   r   r   r   r   add_wc_parser   s   r   c                 C   ry   )Nindic2romanzindic2roman helprl   r{   r}   rw   r~   rK   r   r   r   r   add_indic2roman_parser   r   r   c                 C   ry   )Nroman2indiczroman2indic helprl   r{   r   r   r   r   r   add_roman2indic_parser   r   r   c                 C   sP   | j ddd}t| |jdddg ddd	 |jd
dddd |jtd d S )Nscript_unifyzscript_unify helprl   z-mz--moderR   )rS   rR   rO   zScript unification mode)rh   choicesri   z-cz--common_langhiz6Common language in which all languages are represented)rh   ri   r{   )r}   rw   rn   r~   rZ   r   r   r   r   add_script_unify_parser   s   r   c                 C   ry   )Nscript_convertzscript convert helprl   r{   )r}   rx   r~   r_   r   r   r   r   add_script_convert_parser   r   r   c                  C   sv   t jdd} | jddd}t| t| t| t| t| t| t	| t
| t| t| t| | S )Nindicnlp)progz1Invoke each operation with one of the subcommands
subcommand)ri   dest)ro   ArgumentParseradd_subparsersr   r   r   r   r   r   r   r   r   r   r   )parserr   r   r   r   
get_parser   s   r   c                  C   s   t  } |  }|| d S r   )r   
parse_argsr|   )r   r   r   r   r   main  s   r   __main__).ro   rr   r   r   indicnlp.tokenizer   r   indicnlp.normalizer   indicnlp.morphr   r   indicnlp.syllabler   indicnlp.transliterater	   r
   rq   r   r   r%   r0   r8   r=   rE   rK   rN   rZ   r_   rw   rx   r   r   r   r   r   r   r   r   r   r   r   r   r   __name__loadr   r   r   r   <module>   sT    
