o
    
i$                  
   @   s&  d dl mZ ddlZddlmZ ddlmZ ddlmZ	 ddl
mZ ddl
mZ ddl
mZ dd	l
mZ dd
lmZ d dlmZ g dZeg d7 Zg dZi dddddddddddddddddd d!d"d#d$d%d&d'd(d)d*d+d,d-d.d/d0i d1d2d3d4d5d6d7d8d9d:d;d<d=d>d?d@dAdBdCdDdEdFdGdHdIdJdKdLdMdNdOdPdQdRi dSdTdUdVdWdXdYdZd[d\d]d^d_d`dadbdcdddedfdgdhdidjdkdldmdndodpdqdrdsdtdudvdwdxdydzd{d|d}Zd~D ]ZeevsJ eeee< qG dd dZdS )   )MToken    N)
itemgetter)List)lazy_pinyin)load_phrases_dict)load_single_dict)Style)large_pinyin)
ToneSandhi)bpmfdtnlgkhzhchshrzcsjqx)yw )012345r   u   ㄅr   u   ㄆr   u   ㄇr   u   ㄈr   u   ㄉr   u   ㄊr   u   ㄋr   u   ㄌr   u   ㄍr   u   ㄎr   u   ㄏr   u   ㄐr   u   ㄑr    u   ㄒr   u   ㄓr   u   ㄔr   u   ㄕr   u   ㄖr   u   ㄗr   u   ㄘr   u   ㄙau   ㄚou   ㄛeu   ㄜieu   ㄝaiu   ㄞeiu   ㄟaou   ㄠouu   ㄡanu   ㄢenu   ㄣangu   ㄤengu   ㄥeru   ㄦiu   ㄧuu   ㄨvu   ㄩiiu   ㄭiiiu   十veu   月iau   压ianu   言iangu   阳iaou   要inu   阴ingu   应iongu   用iouu   又ongu   中uau   穵uaiu   外u   万u   王u   为u   文u   瓮u   我u   元u   云)uanuangueiuenuenguovanvnu   ;:,.!?/—…"()“” 12345Rc                   @   s   e Zd ZdddZdd Zdedeee  fdd	Zd
ee dee dededeee  f
ddZddede	dee
 fddZdS )
ZHFrontend   ❓c                 C   s  || _ td| _i ddgdgdggddgdgdggd	d
gdgdggddgdggddgdggddgdggddgdgdggddgdgdggddgdggddgdggdd ggd!d"ggd#d$ggd%d&ggd'd(ggd)d*gd+gg| _h d,| _h d-| _t | _|   d S ).Nu   ;:,.!?—…"()“”u	   开户行ka1ihu4hang2u	   发卡行fa4ka3u	   放款行fa4ngkua3nu   茧行jia3nu   行号ha4ou   各地ge4di4u	   借还款jie4hua2nu	   时间为shi2jia1nwe2iu   为准zhu3nu   色差se4cha1u   嗲dia3u   呗bei5u   不bu4u   咗zuo5u   嘞lei5u   掺和chan1huo5>      范儿	   妥妥儿	   媳妇儿	   小院儿	   撒欢儿	   老汉儿	   胡同儿   寻老礼儿>,      一儿   为儿   乞儿   侄儿   俺儿   可儿   女儿   妻儿   婴儿   孙儿   孤儿   少儿   幼儿   患儿   我儿   护儿   拐儿   救儿   替儿   有儿   狗儿   猪儿   猫儿   男儿   瞒儿   祖儿   美儿   聋儿   舫儿   花儿   虐儿   虫儿   马儿   鸟儿	   体弱儿	   侄孙儿	   婴幼儿	   应采儿	   流浪儿	   混血儿	   红孩儿	   脑瘫儿	   蜜雪儿	   连体儿)	unk	frozensetpuncphrases_dict
must_erhua	not_erhuar   tone_modifier_init_pypinyin)selfr    r   F/home/ubuntu/.local/lib/python3.10/site-packages/misaki/zh_frontend.py__init__-   sN   
	


zZHFrontend.__init__c                 C   s&   t   t| j ttddi dS )z+
        Load pypinyin G2P module.
        u   地zde,di4N)r
   loadr   r   r   ordr   r   r   r   r   Q   s   
zZHFrontend._init_pypinyinwordreturnc           
      C   s   g }g }t |dtjd}t |dtjd}dd t|D }|D ]}d||< qt||D ]+\}}	td|	rL|dv rAtdd	|	}	n|d
v rLtdd|	}	|	| |	|	 q+||fS )z@
        Get word initial and final by pypinyin or g2pM
        T)neutral_tone_with_fivestylec                 S   s   g | ]
\}}|d kr|qS )u   嗯r   ).0indexr   r   r   r   
<listcomp>e   s    z3ZHFrontend._get_initials_finals.<locals>.<listcomp>n2zi\d)r   r   r   r7   r:   )r   r   r   r   r;   )
r   r	   INITIALSFINALS_TONE3	enumerateziprematchsubappend)
r   r   initialsfinalsorig_initialsorig_finalsen_indexr7   r   r9   r   r   r   _get_initials_finalsZ   s(   

zZHFrontend._get_initials_finalsr   r   posc           	      C   s(  t |D ]\}}|t|d kr|| dkr|dkrd||< q|| jvr1|| jv s-|dv r1||fS t|t|kr=||fS t|t|ksGJ g }g }t |D ]@\}}|t|d kr|| dkr|dv r|dd | jvr|r|d	 dd	 d
 |d	 d	  |d	< qO|||  || qO||fS )z
        Do erhub.
        r   u   儿er1er2>   r*   r   nr>   r   er5NR)r   lenr   r   r   )	r   r   r   r   r   r7   phnnew_initials
new_finalsr   r   r   _merge_erhuav   s&   	$$&zZHFrontend._merge_erhuaTtext
with_erhuac                    s  g }t |} j|}g }g }|D ]\}}|dkr*dt|kr*t|dkr*d}n|dkr5| jv r5d}t||dd}	|dv rb| sV|dkrP| jv rP||	_	|
|	 n|ra|d  j|7  _q|ru|d jdvru|d jsud	|d _ |\}
} j|||}|r |
|||\}
}|
|
 |
| g }t|
|D ]\}}|r|
| |r| jvs||kr|
| qd
|dddd}tdd
|d
}d fdd|D |	_	|
|	 qd fdd|D }||fS )z`
        Return: list of list phonemes.
            [['w', 'o3', 'm', 'en2', ' '], ...]
        r    u   一u   鿿X )r   tag
whitespace)r    r5   r   /__eR_err   _Rz(?=\d)c                 3   s    | ]
}t | jV  qd S N)ZH_MAPgetr   )r   r   r   r   r   	<genexpr>   s    z&ZHFrontend.__call__.<locals>.<genexpr>c                 3   s,    | ]}|j d u r jn|j |j V  qd S r   )phonemesr   r   )r   tkr   r   r   r      s   * )psglcutr   pre_merge_for_modifyminmaxr   r   isspacer   r   r   r   r   modified_toner   r   joinreplacer   r   split)r   r   r   tokensseg_cutr   r   r   r   r   sub_initials
sub_finalsphonesr   r9   resultr   r   r   __call__   sX   
 





zZHFrontend.__call__N)rQ   )T)__name__
__module____qualname__r   r   strr   r   r   boolr   r   r   r   r   r   rP   ,   s     
$	

 &rP   )tokenr   r   operatorr   typingr   jieba.possegpossegr   pypinyinr   r   r   r	    pypinyin_dict.phrase_pinyin_datar
   tone_sandhir   r   TONESr   r   rP   r   r   r   r   <module>   s(   V 
