o
    :i                     @   s\   d dl Z d dlmZmZ d dlZd dlZG dd deZG dd deZG dd deZdS )	    N)ABCabstractmethodc                   @   s&   e Zd ZdZededefddZdS )TextNormalizerzFAbstract base class for text normalization, defining common interface.textreturnc                 C   s   t )Normalize text.)NotImplementedErrorselfr    r   4/home/ubuntu/LuxTTS/zipvoice/tokenizer/normalizer.py	normalize   s   zTextNormalizer.normalizeN)__name__
__module____qualname____doc__r   strr   r   r   r   r   r      s    r   c                   @   sz   e Zd ZdZdd ZdedefddZdd	 Zd
d Zdd Z	dd Z
dd Zdd Zdd Zdd Zdd Zdd ZdS )EnglishTextNormalizerz
    A class to handle preprocessing of English text including normalization. Following:
    https://github.com/espnet/espnet_tts_frontend/blob/master/tacotron_cleaner/cleaners.py
    c                 C   s   dd dD | _ t | _td| _td| _td| _td| _	td| _
td	| _td
| _td| _td| _d S )Nc                 S   s*   g | ]}t d |d  t j|d fqS )z\b%s\br      )recompile
IGNORECASE).0xr   r   r   
<listcomp>   s    z2EnglishTextNormalizer.__init__.<locals>.<listcomp>))mrsmisess)mrmister)drdoctor)stsaint)cocompany)jrjunior)majmajor)gengeneral)drsdoctors)revreverend)lt
lieutenant)hon	honorable)sgtsergeant)captcaptain)esqesquire)ltdlimited)colcolonel)ftfort)etcz	et cetera)btwz
by the wayz([0-9][0-9\,]+[0-9])z([0-9]+\.[0-9]+)z([0-9\.\,]*[0-9]+%)u   £([0-9\,]*[0-9]+)z\$([0-9\.\,]*[0-9]+)z([0-9]+)/([0-9]+)z[0-9]+(st|nd|rd|th)z[0-9]+z\s+)_abbreviationsinflectengine_inflectr   r   _comma_number_re_decimal_number_re_percent_number_re
_pounds_re_dollars_re_fraction_re_ordinal_re
_number_re_whitespace_re)r
   r   r   r   __init__   s   
zEnglishTextNormalizer.__init__r   r   c                 C   s   |  |}| |}|S )zVCustom pipeline for English text,
        including number and abbreviation expansion.)expand_abbreviationsnormalize_numbersr	   r   r   r   r   >   s   

zEnglishTextNormalizer.normalizec                 C   s   |dkr
|dkr
dS |dkr|dkrdS |dkr"d| j | d S |dkr0d| j | d S d| j | d | j | j | d S )	Nr      z
 one half    z one quarter  z halves z
 quarters )rD   number_to_wordsordinal)r
   	numeratordenominatorr   r   r   fraction_to_wordsF   s$   
z'EnglishTextNormalizer.fraction_to_wordsc                 C      | dddS )Nr   , groupreplacer
   mr   r   r   _remove_commasW      z$EnglishTextNormalizer._remove_commasc                 C   s   | d}|d}t|dkrd| d S |d r t|d nd}t|dkr2|d r2t|d nd}|rP|rP|dkr>dnd}|dkrFd	nd
}d||||f S |r`|dkrXdnd}d||f S |rp|dkrhd	nd
}d||f S dS )Nr   .rQ   rS   z	 dollars r   dollardollarscentcentsz %s %s, %s %s z %s %s z zero dollars )r]   splitlenint)r
   r`   matchpartsre   rg   dollar_unit	cent_unitr   r   r   _expand_dollarsZ   s"   

$z%EnglishTextNormalizer._expand_dollarsc                 C   s(   t |d}t |d}| ||S )Nr   rQ   )rj   r]   rX   )r
   r`   rV   rW   r   r   r   _expand_fractionn   s   z&EnglishTextNormalizer._expand_fractionc                 C   rY   )Nr   rc   z point r\   r_   r   r   r   _expand_decimal_points   rb   z+EnglishTextNormalizer._expand_decimal_pointc                 C   rY   )Nr   %z	 percent r\   r_   r   r   r   _expand_percentv   rb   z%EnglishTextNormalizer._expand_percentc                 C   s   d| j |d d S )NrS   r   )rD   rT   r]   r_   r   r   r   _expand_ordinaly   s   z%EnglishTextNormalizer._expand_ordinalc                 C   s   t |d}|dkrM|dk rM|dkrdS |dkr)|dk r)d| j|d  d S |d dkr;d| j|d  d	 S d| jj|d
ddddd d S d| jj|d
d d S )Nr   i  i  i  z two thousand i  d   rS   z	 hundred r[   ohrQ   )andwordzeror]   z, )rw   )rj   r]   rD   rT   r^   )r
   r`   numr   r   r   _expand_number|   s$   z$EnglishTextNormalizer._expand_numberc                 C   s   t | j| j|}t | jd|}t | j| j|}t | j| j|}t | j	| j
|}t | j| j|}t | j| j|}t | j| j|}|S )Nz	\1 pounds)r   subrE   ra   rH   rI   ro   rJ   rp   rF   rq   rG   rs   rK   rt   rL   rz   r	   r   r   r   rP      s   z'EnglishTextNormalizer.normalize_numbersc                 C   s"   | j D ]\}}t|||}q|S )N)rA   r   r{   )r
   r   regexreplacementr   r   r   rO      s   z*EnglishTextNormalizer.expand_abbreviationsN)r   r   r   r   rN   r   r   rX   ra   ro   rp   rq   rs   rt   rz   rP   rO   r   r   r   r   r      s    'r   c                   @   s"   e Zd ZdZdedefddZdS )ChineseTextNormalizerzR
    A class to handle preprocessing of Chinese text including normalization.
    r   r   c                 C   s   t |d}|S )r   an2cn)cn2an	transformr	   r   r   r   r      s   zChineseTextNormalizer.normalizeN)r   r   r   r   r   r   r   r   r   r   r~      s    r~   )	r   abcr   r   r   rB   r   r   r~   r   r   r   r   <module>   s    	 