o
    iE1                     @   s  d dl Z d dlZd dlmZmZmZ defddZdeee ef fddZdeee ef fd	d
Z	d>dee dee dee fddZ
d>dee dee fddZdd ZddddddddZdddddd dd d!Zd"d"d"d"d"d"d#Zi d$d%d&dd'dd(dd)dd*dd+dd,dd-dd.dd/dd0dd1dd2dd3dd4dd5ddddd dd6ddddddd7Zh d8Zh d9Zd:d; Zd<d= ZdS )?    N)AnyListUnionchc                 C   s<   d|   kr
dksn d|   krdksn | dkrdS dS )Nu   一u   鿿09@TF )r   r	   r	   R/home/ubuntu/.local/lib/python3.10/site-packages/funasr/utils/postprocess_utils.py	isChinese   s   4r   wordc                 C   s   g }| D ]%}| dd}| dd}| dd}| dd}| dd}|| qt|dkr2dS |D ]}t|du r? dS q4d	S )
N  </s><s><unk><OOV>r   FT)replaceappendlenr   r   
word_listsicurr   r	   r	   r
   isAllChinese   s   r   c                 C   s   g }| D ]%}| dd}| dd}| dd}| dd}| dd}|| qt|dkr2dS |D ]}| du rC|d	krC dS | d
u rRt|d
u rR dS q4d
S )Nr   r   r   r   r   r   r   F'T)r   r   r   isalphar   r   r	   r	   r
   
isAllAlpha!   s"   r   words
time_stampreturnc                 C   s  t | }g }g }g }d}g }g }d}	t|D ]}
|
|krqt | |
 dkr| |
 d r|
d |k r| |
d  dkr|
d |k rt | |
d  dkr| |
d  d r||
 |
d7 }
||
 	 |
d7 }
|
|k r| |
 dkr|
d7 }
|
|k rt | |
 dkr| |
 d r|  ||
 |
}nnnqfqt|D ]}
| |
 dkr||	 q||	 |	d7 }	qd}t|D ]}
|
|krq|
|v r2|d ur|||
  d }| |
  }|
d7 }
|
|k r|
|v r|| |
  7 }|
}n| |
 d r|| |
  7 }|
d7 }
|
|k s|| |d ur1||
 t |k r1|||
  d }|||g q|| |
  |d urg||
 t |k rg| |
 dkrg|||
  d }|||
  d }|||g |}q|d urq||fS |S )Nr      utf-8r      )r   rangeencoder   r   popupper)r   r   
words_sizer   
abbr_beginabbr_endlast_numts_liststs_numsts_indexnumbegin	abbr_wordendr	   r	   r
   abbr_dispose8   s   "







	*
r4   c                 C   s  g }g }d}g }| D ]}d}t |tr|}n|d}|dv r q
|| q
t|rCt|D ]\}}||dd q.|d urB|}nt|rd}	t|D ]X\}}|	rc|d urc|| d }
|| d }d}d|v r|dd}||7 }|d urd	}	|| d }qM||7 }|| |d d}|d urd}	|| d }||
|g |}
qMnd	}d}	d
}
d
}t|D ]\}}|	r|d ur|| d }
|| d }d}t|r|du r|  || d	}|d urd}	||
|g |}
qd|v r|dd}||7 }d	}|d urd	}	|| d }qt|r;||7 }|| |d d}d}|d ur:d}	|| d }||
|g |}
q|| q|d urkt	||\}}g }|D ]}|dkr]|| qQd
| }|||fS t	|}g }|D ]}|dkr|| qsd
| }||fS )Nr   r#   r   r   r   r   r   Tr   r"   z@@Fr!   )
isinstancestrdecoder   r   	enumerater   r   r'   r4   joinstrip)r   r   middle_listsr   	word_itemr-   r   r   r   ts_flagr1   r3   alpha_blankreal_word_listssentencer	   r	   r
   sentence_postprocess   s   
















rB   c           	      C   sh  g }g }d}| D ]}d}t |tr|}n|d}|dv rq|| qt|D ]>\}}d}d|v rC|dkrCd}|dd}||7 }q(d|v rb|dkrb|| |d d}|dd}||7 }q(||7 }q(|d urp|| g }|D ]6}|dkr|dkr|dd}n |d	kr|d	d
}n|dkr|dd}n
|dkr|dd}|| qtd|}||fS )Nr   r#   r5   u   ▁r   r   r   Izi'mzI'mzi'vezI'vezi'llzI'll)r6   r7   r8   r   r9   r   r:   )	r   r<   r   r=   r   r   r   r@   rA   r	   r	   r
   "sentence_postprocess_sentencepiece   sR   









rD      😊   😔   😡r      😰   🤢   😮)	<|HAPPY|><|SAD|>	<|ANGRY|><|NEUTRAL|><|FEARFUL|><|DISGUSTED|><|SURPRISED|>   🎼   👏   😀   😭   🤧)<|BGM|>
<|Speech|><|Applause|><|Laughter|><|Cry|>
<|Sneeze|>
<|Breath|>	<|Cough|><|lang|>)<|zh|><|en|><|yue|><|ja|><|ko|><|nospeech|><|nospeech|><|Event_UNK|>   ❓r`   ra   rb   rc   rd   re   rK   rL   rM   rN   rW   rX   rY   rZ   rO   rP      😷)rQ   r[   z<|EMO_UNKNOWN|>r\   r]   r^   z<|Sing|>z<|Speech_Noise|>z<|withitn|>z	<|woitn|>z<|GBG|>z<|Event_UNK|>>   rE   rF   rG   rJ   rH   rI   >   rR   rS   rT   rU   rh   rV   c                 C   s   i }t D ]}| |||< | |d} qd}tD ]}|| || kr$|}qtD ]}|| dkr5t| |  } q'| t|  } ttD ]}| d| |} | |d |} qA|  S )Nr   rN   r   r   )	
emoji_dictcountr   emo_dict
event_dictemo_setunion	event_setr;   )s	sptk_dictsptkemoeemojir	   r	   r
   format_str_v2{  s$   rv   c                 C   s*  dd }dd }|  dd} tD ]}|  |d} qdd	 | dD }d
|d  }||}tdt|D ]V}t|| dkr?q4||| |krY||| d krY|| dd  ||< t|| dkrbq4||| }||| d kr||| ||kr|d d }|||   7 }q4| dd
}| S )Nc                 S      | d t v r
| d S d S )Nr!   )rm   rp   r	   r	   r
   get_emo     z/rich_transcription_postprocess.<locals>.get_emoc                 S   rw   )Nr   )ro   rx   r	   r	   r
   	get_event  rz   z1rich_transcription_postprocess.<locals>.get_eventrf   rg   r_   c                 S   s   g | ]	}t |d qS )r   )rv   r;   ).0s_ir	   r	   r
   
<listcomp>  s    z2rich_transcription_postprocess.<locals>.<listcomp>r   r   r"   r!   zThe.)r   	lang_dictsplitr%   r   r;   lstrip)rp   ry   r{   langs_listnew_scur_ent_eventr   r	   r	   r
   rich_transcription_postprocess  s*    $r   )N)stringloggingtypingr   r   r   r7   r   r   r   r4   rB   rD   rk   rl   r   ri   rm   ro   rv   r   r	   r	   r	   r
   <module>   s   $Xn5		
 
