o
    }oi-
                     @   s:   d dl mZ d dlZd dlmZ dgZG dd deZdS )    )PathN)TokenizerSpecYouTokenToMeTokenizerc                   @   sv   e Zd ZdddZdd Zdd Zd	d
 Zdd Zdd Zdd Z	e
dd Ze
dd Ze
dd Ze
dd ZdS )r           Fc                 C   sT   t | }tjt|d| _t| j | _| 	g d| _
|| _|| _|| _d S )N)model)<PAD><UNK><BOS><EOS>)r   
expanduseryttmBPEstr	tokenizerlenvocab
vocab_sizetokens_to_idsspecial_tokensbpe_dropoutlegacyr2l)self
model_pathr   r   r    r   m/home/ubuntu/.local/lib/python3.10/site-packages/nemo/collections/common/tokenizers/youtokentome_tokenizer.py__init__   s   
zYouTokenToMeTokenizer.__init__c                 C      | j j|tjj| j| jdS N)output_typedropout_probreverse)r   encoder   
OutputTypeSUBWORDr   r   r   textr   r   r   text_to_tokens"      z$YouTokenToMeTokenizer.text_to_tokensc                 C   s   |  | |S )N)ids_to_textr   r   tokensr   r   r   tokens_to_text'   s   z$YouTokenToMeTokenizer.tokens_to_textc                 C   r   r   )r   r"   r   r#   IDr   r   r%   r   r   r   text_to_ids*   r(   z!YouTokenToMeTokenizer.text_to_idsc                    s8    fdd|D } j r|d d d } j|gd S )Nc                       g | ]	}| j vr|qS r   r   .0id_r   r   r   
<listcomp>0       z5YouTokenToMeTokenizer.ids_to_text.<locals>.<listcomp>r   )r   r   decoder   idsids_r   r4   r   r)   /   s   z!YouTokenToMeTokenizer.ids_to_textc                    s    fdd|D S )Nc                       g | ]} j |qS r   r   subword_to_id)r2   tokenr4   r   r   r5   6       z7YouTokenToMeTokenizer.tokens_to_ids.<locals>.<listcomp>r   r*   r   r4   r   r   5   s   z#YouTokenToMeTokenizer.tokens_to_idsc                    s0    j r fdd|D }n|} fdd|D S )Nc                    r/   r   r0   r1   r4   r   r   r5   :   r6   z7YouTokenToMeTokenizer.ids_to_tokens.<locals>.<listcomp>c                    r<   r   )r   id_to_subwordr1   r4   r   r   r5   =   r@   )r   r9   r   r4   r   ids_to_tokens8   s   z#YouTokenToMeTokenizer.ids_to_tokensc                 C      | j dS )Nr   r=   r4   r   r   r   pad_id?      zYouTokenToMeTokenizer.pad_idc                 C   rC   )Nr	   r=   r4   r   r   r   bos_idC   rE   zYouTokenToMeTokenizer.bos_idc                 C   rC   )Nr
   r=   r4   r   r   r   eos_idG   rE   zYouTokenToMeTokenizer.eos_idc                 C   rC   )Nr   r=   r4   r   r   r   unk_idK   rE   zYouTokenToMeTokenizer.unk_idN)r   FF)__name__
__module____qualname__r   r'   r,   r.   r)   r   rB   propertyrD   rF   rG   rH   r   r   r   r   r      s     
	


)pathlibr   youtokentomer   "nemo.collections.common.tokenizersr   __all__r   r   r   r   r   <module>   s
   