o
    }oiE                     @   s   d dl mZ G dd dZdS )    )Listc                   @   sP   e Zd ZdZdefddZdee defddZd	efd
dZd	efddZ	dS )MosesProcessorzB
    Tokenizer, Detokenizer and Normalizer utilities in Moses
    lang_idc                 C   s<   ddl m}m}m} ||d| _||d| _||d| _d S )Nr   )MosesDetokenizerMosesPunctNormalizerMosesTokenizer)lang)
sacremosesr   r   r   moses_tokenizermoses_detokenizer
normalizer)selfr   r   r   r    r   g/home/ubuntu/.local/lib/python3.10/site-packages/nemo/collections/common/tokenizers/moses_tokenizers.py__init__   s   zMosesProcessor.__init__tokensreturnc                 C      | j |S )z
        Detokenizes a list of tokens
        Args:
            tokens: list of strings as tokens
        Returns:
            detokenized string
        )r   
detokenize)r   r   r   r   r   r      s   zMosesProcessor.detokenizetextc                 C   s   | j j|dddS )z>
        Tokenizes text using Moses -> Sentencepiece.
        FT)escape
return_str)r
   tokenizer   r   r   r   r   r   (   s   zMosesProcessor.tokenizec                 C   r   )N)r   	normalizer   r   r   r   r   .   s   zMosesProcessor.normalizeN)
__name__
__module____qualname____doc__strr   r   r   r   r   r   r   r   r   r      s    
r   N)typingr   r   r   r   r   r   <module>   s   