o
    }oi	                     @   s   d dl mZ G dd dZdS )    )Listc                   @   sP   e Zd ZdZdefddZdee defddZd	efd
dZd	efddZ	dS )IndicProcessorzj
    Tokenizer, Detokenizer and Normalizer utilities in Indic Languages.
    Currently supports: 'hi'
    lang_idc                 C   sH   |dkrt ddlm}m}m} ||d| _||d| _||d| _d S )Nhir   )MosesDetokenizerMosesPunctNormalizerMosesTokenizer)lang)NotImplementedError
sacremosesr   r   r   moses_tokenizermoses_detokenizer
normalizer)selfr   r   r   r    r   g/home/ubuntu/.local/lib/python3.10/site-packages/nemo/collections/common/tokenizers/indic_tokenizers.py__init__   s   zIndicProcessor.__init__tokensreturnc                 C   s   | j |S )z
        Detokenizes a list of tokens
        Args:
            tokens: list of strings as tokens
        Returns:
            detokenized string
        )r   
detokenize)r   r   r   r   r   r   !   s   zIndicProcessor.detokenizetextc                 C      |S Nr   r   r   r   r   r   tokenize+      zIndicProcessor.tokenizec                 C   r   r   r   r   r   r   r   	normalize.   r   zIndicProcessor.normalizeN)
__name__
__module____qualname____doc__strr   r   r   r   r   r   r   r   r   r      s    	
r   N)typingr   r   r   r   r   r   <module>   s   