o
    'N ib  ã                   @   sŠ   d Z ddlZddlZddlZddlmZ dZe de d ¡ZdZ	e de	 d	 ¡Z
d
Ze de d	 ¡Ze d¡Zdd„ Zddd„ZdS )z$
De-tokenizer for Indian languages.
é    N)ÚIndicNlpExceptionz!%)\]},.:;>?\u0964\u0965z[ ]([z])z#$(\[{<@z([z])[ ]z-/\\z([0-9]+ [,.:/] )+[0-9]+c                 C   s$  | }d}d}t  |¡D ]$}| ¡ }| ¡ }||kr/||||…  }||||…  dd¡ }|}q|||d…  }|}t d|¡}t d|¡}t d|¡}d}|D ]?}d}	g }
|D ]!}||krt|	d dkrj|
 	d¡ n|
 	d	¡ |	d
7 }	qX|
 	|¡ qXd 
|
¡ d|¡ d|¡ d|¡ d	|¡}qP|S )a‡  detokenize string for Indian language scripts using Brahmi-derived scripts

    A trivial detokenizer which:

        - decides whether punctuation attaches to left/right or both
        - handles number sequences
        - handles quotes smartly (deciding left or right attachment)

    Args:
        text (str): tokenized text to process 

    Returns:
        str: detokenized string
    Ú r   ú Nz\1z'"`é   z@RAz@LAé   z@RA z @LA)Úpat_num_seqÚfinditerÚstartÚendÚreplaceÚpat_lraÚsubÚpat_laÚpat_raÚappendÚjoin)ÚtextÚsÚnew_sÚprevÚmr	   r
   Ú
alt_attachÚpuncÚcntÚout_strÚc© r   úV/home/ubuntu/.local/lib/python3.10/site-packages/indicnlp/tokenize/indic_detokenize.pyÚtrivial_detokenize_indic'   s>   €

ÿr   Úhic                 C   s   t | ƒS )aÅ  detokenize string for languages of the Indian subcontinent 

    A trivial detokenizer which:

        - decides whether punctuation attaches to left/right or both
        - handles number sequences
        - handles quotes smartly (deciding left or right attachment)

    Args:
        text (str): tokenized text to process 

    Returns:
        str: detokenized string

    Raises:
        IndicNlpException: If language is not supported        
    )r   )r   Úlangr   r   r   Útrivial_detokenizee   s   r!   )r   )Ú__doc__ÚstringÚreÚsysÚindicnlp.commonr   Úleft_attachÚcompiler   Úright_attachr   Ú	lr_attachr   r   r   r!   r   r   r   r   Ú<module>   s   
>