o
    ¡¿¯iT  ã                   @   s    d Z ddlZddlmZ ddlmZ e d¡Zdd„ d	D ƒZd
d„ Zdd„ Z	dd„ Z
dd„ Zdd„ Zdd„ Zdd„ Zdd„ Zdd„ Zdd„ Zdd„ Zd d!„ ZdS )"a¢  This file is derived from https://github.com/keithito/tacotron.

Cleaners are transformations that run over the input text at both training and eval time.

Cleaners can be selected by passing a comma-delimited list of cleaner names as the "cleaners"
hyperparameter. Some cleaners are English-specific. You'll typically want to use:
1. "english_cleaners" for English text
2. "transliteration_cleaners" for non-English text that can be transliterated to ASCII using
   the Unidecode library (https://pypi.python.org/pypi/Unidecode)
3. "basic_cleaners" if you do not want to transliterate (in this case, you should also update
   the symbols in symbols.py to match your data).
é    N©Ú	unidecodeé   ©Únormalize_numbersz\s+c                 C   s*   g | ]}t  d |d  t j¡|d f‘qS )z\b%s\.r   r   )ÚreÚcompileÚ
IGNORECASE)Ú.0Úx© r   úM/home/ubuntu/.local/lib/python3.10/site-packages/tacotron_cleaner/cleaners.pyÚ
<listcomp>   s   * r   ))ÚmrsÚmisess)ÚmrÚmister)ÚdrÚdoctor)ÚstÚsaint)ÚcoÚcompany)ÚjrÚjunior)ÚmajÚmajor)ÚgenÚgeneral)ÚdrsÚdoctors)ÚrevÚreverend)ÚltÚ
lieutenant)ÚhonÚ	honorable)ÚsgtÚsergeant)ÚcaptÚcaptain)ÚesqÚesquire)ÚltdÚlimited)ÚcolÚcolonel)ÚftÚfortc                 C   s    t D ]\}}t ||| ¡} q| S ©N)Ú_abbreviationsr   Úsub)ÚtextÚregexÚreplacementr   r   r   Úexpand_abbreviations.   s   r9   c                 C   ó   t | ƒS r3   r   ©r6   r   r   r   Úexpand_numbers4   ó   r<   c                 C   ó   |   ¡ S r3   )Úlowerr;   r   r   r   Ú	lowercase8   r=   r@   c                 C   s   t  td| ¡S )Nú )r   r5   Ú_whitespace_rer;   r   r   r   Úcollapse_whitespace<   s   rC   c                 C   r:   r3   r   r;   r   r   r   Úconvert_to_ascii@   r=   rD   c                 C   s   t | ƒ} t| ƒ} | S )zPBasic pipeline that lowercases and collapses whitespace without transliteration.)r@   rC   r;   r   r   r   Úbasic_cleanersD   s   rE   c                 C   s   t | ƒ} t| ƒ} t| ƒ} | S )z;Pipeline for non-English text that transliterates to ASCII.)rD   r@   rC   r;   r   r   r   Útransliteration_cleanersK   s   rF   c                 C   s,   t | ƒ} t| ƒ} t| ƒ} t| ƒ} t| ƒ} | S )zGPipeline for English text, including number and abbreviation expansion.)rD   r@   r<   r9   rC   r;   r   r   r   Úenglish_cleanersS   s   rG   c                 C   s   t  dd| ¡} | S )Nz[\(\)\[\]\<\>\"]+Ú ©r   r5   r;   r   r   r   Úremove_unnecessary_symbols^   s   rJ   c                 C   s<   t  dd| ¡} t  dd| ¡} t  dd| ¡} t  dd| ¡} | S )Nz\;ú,z\:z\-rA   z\&ÚandrI   r;   r   r   r   Úexpand_symbolsd   s
   rM   c                 C   r>   r3   )Úupperr;   r   r   r   Ú	uppercasem   s   rO   c                 C   sD   t | ƒ} t| ƒ} t| ƒ} t| ƒ} t| ƒ} t| ƒ} t| ƒ} t| ƒ} | S )zNCustom pipeline for English text, including number and abbreviation expansion.)rD   r@   r<   r9   rM   rJ   rO   rC   r;   r   r   r   Úcustom_english_cleanersr   s   rP   )Ú__doc__r   r   Únumbersr   r   rB   r4   r9   r<   r@   rC   rD   rE   rF   rG   rJ   rM   rO   rP   r   r   r   r   Ú<module>   s$    
	