o
    i                     @   sT   d dl Z d dlmZ d dlmZmZmZ d dlmZ d dl	m
Z
 G dd de
ZdS )    N)Path)IterableListUnion)check_argument_types)AbsTokenizerc                   @   sl   e Zd Z			ddeeeee f dedefddZd	d
 Z	dede
e fddZdee defddZdS )CharTokenizerN<space>Fnon_linguistic_symbolsspace_symbolremove_non_linguistic_symbolsc                 C   s   t  sJ || _|d u rt | _nIt|ttfrUt|}z#|jddd}tdd |D | _W d    n1 s9w   Y  W n tyT   t	
| d t | _Y nw t|| _|| _d S )Nrzutf-8)encodingc                 s   s    | ]}|  V  qd S )N)rstrip).0line r   O/home/ubuntu/.local/lib/python3.10/site-packages/espnet2/text/char_tokenizer.py	<genexpr>   s    z)CharTokenizer.__init__.<locals>.<genexpr>z doesn't exist.)r   r   setr
   
isinstancer   stropenFileNotFoundErrorwarningswarnr   )selfr
   r   r   fr   r   r   __init__   s"   



zCharTokenizer.__init__c                 C   s   | j j d| j d| j dS )Nz(space_symbol="z"non_linguistic_symbols="z"))	__class____name__r   r
   r   r   r   r   __repr__!   s   
zCharTokenizer.__repr__r   returnc                 C   s   g }t |dkrG| jD ]}||r*| js ||d t |  |t |d  } nq|d }|dkr6| j}|| |dd  }t |dks|S )Nr       )lenr
   
startswithr   appendr   )r   r   tokenswtr   r   r   text2tokens)   s    


zCharTokenizer.text2tokensr)   c                    s    fdd|D }d |S )Nc                    s   g | ]}| j kr|nd qS )r$   )r   )r   r+   r!   r   r   
<listcomp>;   s    z-CharTokenizer.tokens2text.<locals>.<listcomp> )join)r   r)   r   r!   r   tokens2text:   s   
zCharTokenizer.tokens2text)Nr	   F)r    
__module____qualname__r   r   r   r   boolr   r"   r   r,   r0   r   r   r   r   r   
   s    
r   )r   pathlibr   typingr   r   r   	typeguardr   espnet2.text.abs_tokenizerr   r   r   r   r   r   <module>   s    