o
    
i                     @   sV  d Z ddlZddlmZ ddlmZ dd edD Zedd	d
dddZdZ	e
dZdefddZe
dZdefddZe
dZdefddZe
dZdd Ze
dZe
de	 Ze
dZdefdd Zdefd!d"Ze
d#Zdefd$d%Zd2d'ed(edee fd)d*Zd'edefd+d,Zd3d'edefd.d/Zd'edefd0d1ZdS )4um   
Rules to verbalize numbers into Chinese characters.
https://zh.wikipedia.org/wiki/中文数字#現代中文
    N)OrderedDict)Listc                 C   s   i | ]	\}}t ||qS  )str).0itranr   r   O/home/ubuntu/.local/lib/python3.10/site-packages/misaki/zh_normalization/num.py
<dictcomp>   s    r
   u   零一二三四五六七八九u   十u   百u   千u   万u   亿)               u  (封|艘|把|目|套|段|人|所|朵|匹|张|座|回|场|尾|条|个|首|阙|阵|网|炮|顶|丘|棵|只|支|袭|辆|挑|担|颗|壳|窠|曲|墙|群|腔|砣|座|客|贯|扎|捆|刀|令|打|手|罗|坡|山|岭|江|溪|钟|队|单|双|对|出|口|头|脚|板|跳|枝|件|贴|针|线|管|名|位|身|堂|课|本|页|家|户|层|丝|毫|厘|分|钱|两|斤|担|铢|石|钧|锱|忽|(千|毫|微)克|毫|厘|(公)分|分|寸|尺|丈|里|寻|常|铺|程|(千|分|厘|毫|微)米|米|撮|勺|合|升|斗|石|盘|碗|碟|叠|桶|笼|盆|盒|杯|钟|斛|锅|簋|篮|盘|桶|罐|瓶|壶|卮|盏|箩|箱|煲|啖|袋|钵|年|月|日|季|刻|时|周|天|秒|分|小时|旬|纪|岁|世|更|夜|春|夏|秋|冬|代|伏|辈|丸|泡|粒|颗|幢|堆|条|根|支|道|面|片|张|颗|块|元|(亿|千万|百万|万|千|百)|(亿|千万|百万|万|千|百|美|)元|(亿|千万|百万|万|千|百|十|)吨|(亿|千万|百万|万|千|百|)块|角|毛|分|(公(里|引|丈|尺|寸|分|釐)))z(-?)(\d+)/(\d+)returnc                 C   sP   |  d}|  d}|  d}|rdnd}t|}t|}| | d| }|S )A
    Args:
        match (re.Match)
    Returns:
        str
    r   r   r      负 u   分之groupnum2str)matchsign	nominatordenominatorresultr   r   r	   replace_frac%   s   


r   z(-?)(\d+(\.\d+)?)%c                 C   s:   |  d}|  d}|rdnd}t|}| d| }|S )r   r   r   r   r   u	   百分之r   )r   r   percentr   r   r   r	   replace_percentage:   s   

r   z(-)(\d+)c                 C   s8   |  d}|  d}|rdnd}t|}| | }|S )r   r   r   r   r   r   )r   r   numberr   r   r   r	   replace_negative_numN   s   

r    z\d{3}\d*c                 C   s   |  d}t|ddS )r   r   T)alt_one)r   verbalize_digit)r   r   r   r   r	   replace_default_numb   s   
r#   z(-?)((\d+)(\.\d+))|(\.(\d+))u   (\d+)([多余几\+])?z(-?)((\d+)(\.\d+)?)|(\.(\d+))c                 C   sR   |  d}|  d}|dkrd}|r|nd}|  d}t|}| | | }|S )r   r   r   +u   多r   r   r   )r   r   match_2quantifiersr   r   r   r	   replace_positive_quantifieru   s   


r'   c                 C   sR   |  d}|  d}|  d}|rt|}|S |rdnd}t|}| | }|S )r   r   r      r   r   r   )r   r   r   pure_decimalr   r   r   r	   replace_number   s   


r*   zB((-?)((\d+)(\.\d+)?)|(\.(\d+)))[-~]((-?)((\d+)(\.\d+)?)|(\.(\d+)))c                 C   s@   |  d|  d}}tt|}tt|}| d| }|S )r   r   r   u   到)r   	RE_NUMBERsubr*   )r   firstsecondr   r   r   r	   replace_range   s
   r/   Tvalue_stringuse_zeroc                    s   |  d t dkrg S t dkr*|r%t t| k r%td t  gS t  gS t fddtt D }| d |  }| | d  }t|t| g t| S )N0r   r   c                 3   s     | ]}|t  k r|V  qd S )N)len)r   powerstrippedr   r	   	<genexpr>   s    z_get_value.<locals>.<genexpr>)lstripr3   DIGITSnextreversedUNITSkeys
_get_value)r0   r1   largest_unit
first_partsecond_partr   r5   r	   r>      s   


r>   c                 C   sp   | sdS |  d} t| dkrtd S t| }t|dkr3|d td kr3|d td kr3|dd  }d|S )Nr   r2   r   r   1r   )r8   r3   r9   r>   r<   join)r0   result_symbolsr   r   r	   verbalize_cardinal   s   

rE   Fc                 C   s,   dd | D }d |}|r|dd}|S )Nc                 S   s   g | ]}t | qS r   )r9   )r   digitr   r   r	   
<listcomp>   s    z#verbalize_digit.<locals>.<listcomp>r   u   一u   幺)rC   replace)r0   r!   rD   r   r   r   r	   r"      s
   
r"   c                 C   s   |  d}t|dkr|d }d}nt|dkr|\}}ntd|  dt|}|d}|r>|r4|nd	}|d
t| 7 }|S )N.r   r   r   r   zThe value string: '$z ' has more than one point in it.r2   u   零u   点)splitr3   
ValueErrorrE   rstripr"   )r0   integer_decimalintegerdecimalr   r   r   r	   r      s   



r   )T)F) __doc__recollectionsr   typingr   	enumerater9   r<   COM_QUANTIFIERScompileRE_FRACr   r   RE_PERCENTAGEr   
RE_INTEGERr    RE_DEFAULT_NUMr#   RE_DECIMAL_NUMRE_POSITIVE_QUANTIFIERSr+   r'   r*   RE_RANGEr/   boolr>   rE   r"   r   r   r   r   r	   <module>   sD   





