o
    
i                     @   s  d dl Z d dlmZ ddlmZ ddlmZ ddlmZ ddlmZ ddlm	Z	 dd	lm
Z
 dd
lmZ ddlmZ ddlmZ ddlmZ ddlmZ ddlmZ ddlmZ ddlmZ ddlmZ ddlmZ ddlmZ ddlmZ ddlmZ ddlmZ ddlmZ ddlmZ ddlmZ ddlmZ ddlmZ ddlm Z  ddl!m"Z" ddl!m#Z# dd l!m$Z$ dd!l!m%Z% dd"l!m&Z& dd#l'm(Z( dd$l'm)Z) dd%l'm*Z* G d&d' d'Z+dS )(    N)List   )tranditional_to_simplified)RE_DATE)RE_DATE2)RE_TIME)RE_TIME_RANGE)replace_date)replace_date2)replace_time)F2H_ASCII_LETTERS)
F2H_DIGITS)	F2H_SPACE)RE_DECIMAL_NUM)RE_DEFAULT_NUM)RE_FRAC)
RE_INTEGER)	RE_NUMBER)RE_PERCENTAGE)RE_POSITIVE_QUANTIFIERS)RE_RANGE)replace_default_num)replace_frac)replace_negative_num)replace_number)replace_percentage)replace_positive_quantifier)replace_range)RE_MOBILE_PHONE)RE_NATIONAL_UNIFORM_NUMBER)RE_TELEPHONE)replace_mobile)replace_phone)RE_TEMPERATURE)replace_measure)replace_temperaturec                   @   sf   e Zd Zdd Zddedee fddZdedefd	d
ZdedefddZdedee fddZ	dS )TextNormalizerc                 C   s   t d| _d S )Nu&   ([：、，；。？！,;?!][”’]?))recompileSENTENCE_SPLITORself r,   ^/home/ubuntu/.local/lib/python3.10/site-packages/misaki/zh_normalization/text_normalization.py__init__6   s   zTextNormalizer.__init__zhtextreturnc                 C   sR   |dkr| dd}tdd|}| jd|}| }dd td|D }|S )	zSplit long text into sentences with sentence-splitting punctuations.
        Args:
            text (str): The input text.
        Returns:
            List[str]: Sentences.
        r/     u2   [——《》【】<=>{}()（）#&@“”^_|…\\]z\1\nc                 S   s   g | ]}|  qS r,   )strip).0sentencer,   r,   r-   
<listcomp>G   s    z)TextNormalizer._split.<locals>.<listcomp>z\n+)replacer'   subr)   r4   split)r+   r0   lang	sentencesr,   r,   r-   _split9   s   zTextNormalizer._splitr6   c                 C   s&  | dd}| dd}| dd}| dd}| dd	}| d
d}| dd}| dd}| dd}| dd}| dd}| dd}| dd}| dd}| dd}| dd d d}| d!d" d#d"}| d$d%}| d&d'}| d(d)}| d*d+ d,d+}| d-d.}| d/d0}| d1d2 d3d2}| d4d5}| d6d7}| d8d9 d:d9}| d;d<}| d=d> d?d>}| d@dA}| dBdC dDdC dEdC}| dFdG}| dHdI}| dJdK dLdK}| dMdN}| dOdP dQdP}| dRdS dTdS}tdUdV|}|S )WN/u   每~u   至u   ～u   ①u   一u   ②u   二u   ③u   三u   ④u   四u   ⑤u   五u   ⑥u   六u   ⑦u   七u   ⑧u   八u   ⑨u   九u   ⑩u   十u   αu	   阿尔法u   βu   贝塔u   γu   伽玛u   Γu   δu	   德尔塔u   Δu   εu   艾普西龙u   ζu   捷塔u   ηu   依塔u   θu   西塔u   Θu   ιu	   艾欧塔u   κu   喀帕u   λu	   拉姆达u   Λu   μu   缪u   νu   拗u   ξu   克西u   Ξu   οu   欧米克伦u   πu   派u   Πu   ρu   肉u   ςu	   西格玛u   Σu   σu   τu   套u   υu   宇普西龙u   φu   服艾u   Φu   χu   器u   ψu   普赛u   Ψu   ωu	   欧米伽u   Ωu3   [-——《》【】<=>{}()（）#&@“”^_|…\\]r3   )r8   r'   r9   r+   r6   r,   r,   r-   _post_replaceJ   sR   zTextNormalizer._post_replacec                 C   s   t |}|ttt}tt|}tt	|}t
t|}tt|}tt|}t|}tt|}tt|}tt|}tt|}tt|}tt|}tt|}tt|}tt |}t!t"|}t#t|}| $|}|S )N)%r   	translater   r   r   r   r9   r	   r   r
   r   r   r   r#   r%   r$   r   r   r   r   r   r!   r    r"   r   r   r   r   r   r   r   r   r   r   r   r   rA   r@   r,   r,   r-   normalize_sentenceu   s6   

z!TextNormalizer.normalize_sentencec                    s      |} fdd|D }|S )Nc                    s   g | ]}  |qS r,   )rC   )r5   sentr*   r,   r-   r7      s    z,TextNormalizer.normalize.<locals>.<listcomp>)r=   )r+   r0   r<   r,   r*   r-   	normalize   s   
zTextNormalizer.normalizeN)r/   )
__name__
__module____qualname__r.   strr   r=   rA   rC   rE   r,   r,   r,   r-   r&   5   s    +"r&   ),r'   typingr   char_convertr   
chronologyr   r   r   r   r	   r
   r   	constantsr   r   r   numr   r   r   r   r   r   r   r   r   r   r   r   r   r   r   	phonecoder   r   r    r!   r"   
quantifierr#   r$   r%   r&   r,   r,   r,   r-   <module>   sJ   