o
    ´©ið/  ã                   @   sd   d dl Zd dlmZ d dlZd dlZd dlZe dd¡G dd„ dƒƒZe dd¡G dd„ dƒƒZ	dS )é    N)ÚtablesÚprompt_classesÚMultiContextPromptc                   @   sp   e Zd Zdddddœdœddd	d
dœdœdœZ					ddd„Zdd„ Zdd„ Zdd„ Zdd„ Zddd„Z	dS )r   z®Please combine the context information provided below to complete the speech transcription task more accurately. If there is no relevant information, we will leave it blank.
ú)Historical transcription: {hist_context}
ú#One-pass result: {one_pass_result}
úHotword list: {hotwords}
©Úhist_contextÚone_pass_resultÚhotwords)ÚheaderÚfieldsu‚   è¯·ç»“åˆä¸‹é¢æä¾›çš„ä¸Šä¸‹æ–‡ä¿¡æ¯ï¼Œæ›´åŠ å‡†ç¡®åœ°å®Œæˆè¯­éŸ³è½¬å†™ä»»åŠ¡ã€‚å¦‚æžœæ²¡æœ‰ç›¸å…³ä¿¡æ¯ï¼Œæˆ‘ä»¬ä¼šç•™ç©ºã€‚
õ$   åŽ†å²è½¬å†™ç»“æžœï¼š{hist_context}
õ'   ä¸€éè§£ç ç»“æžœï¼š{one_pass_result}
õ   çƒ­è¯åˆ—è¡¨ï¼š{hotwords}
©ÚenÚzhTc           	      K   sÈ   || _ || _|| _|| _|| _|| _| dd¡}| dd¡}|r*|  |¡\| _| _	nd | _d| _	t
 d| j	› ¡ |rE|  |¡\| _| _nd | _d| _t
 d| j› ¡ | dd¡| _| d	d¡| _d S )
NÚchinese_hotwords_listÚ Úenglish_hotwords_listr   zchinese_hotwords_num: zenglish_hotwords_num: Úmax_neg_hotwords_numi„  Úmin_neg_hotwords_num)Úuse_histÚuse_one_pass_resultÚuse_hotwordsÚuse_asr_hotwordsÚuse_multi_lingual_promptÚkwargsÚgetÚget_hotwords_listr   Úchinese_hotwords_numÚloggingÚinfor   Úenglish_hotwords_numr   r   )	Úselfr   r   r   r   r   r   r   r   © r&   úh/home/ubuntu/.local/lib/python3.10/site-packages/funasr/datasets/fun_asr_datasets/multicontext_prompt.pyÚ__init__   s(   zMultiContextPrompt.__init__c                 C   sH   t |dƒ}| ¡  ¡  d¡}W d   ƒ n1 sw   Y  |t|ƒfS )NÚrÚ
)ÚopenÚreadÚstripÚsplitÚlen)r%   Úhotwords_fileÚfÚhotwords_listr&   r&   r'   r    >   s   ÿz$MultiContextPrompt.get_hotwords_listc           	      C   s   t |tƒr
d |¡}t d¡}t d¡}| |¡}| |¡}tdd„ |D ƒƒ}tdd„ |D ƒƒ}t|ƒ}|dkr:dS ||krF|| d	krFdS d
S )Nú u
   [ä¸€-é¿¿]+z	[A-Za-z]+c                 s   ó    | ]}t |ƒV  qd S ©N©r/   ©Ú.0Úmatchr&   r&   r'   Ú	<genexpr>R   ó   € z5MultiContextPrompt.detect_language.<locals>.<genexpr>c                 s   r4   r5   r6   r7   r&   r&   r'   r:   S   r;   r   r   g333333Ó?r   )Ú
isinstanceÚlistÚjoinÚreÚcompileÚfindallÚsumr/   )	r%   ÚtextÚchinese_patternÚenglish_patternÚchinese_matchesÚenglish_matchesÚchinese_lengthÚenglish_lengthÚtotal_lengthr&   r&   r'   Údetect_languageC   s    

ÿ


z"MultiContextPrompt.detect_languagec                 C   sv   |}g }| j dkrt| j t|ƒƒ}nt|ƒ}| j|k r&tj | j|d ¡}n|}|dkr7tjj||dd ¡ }||fS )Néÿÿÿÿé   r   F©Úreplace)	r   Úminr/   r   ÚnpÚrandomÚrandintÚchoiceÚtolist)r%   r   r2   Úselected_hotwordsr   Úselected_hotwords_numr&   r&   r'   Úhotwords_sampling_   s   

z$MultiContextPrompt.hotwords_samplingc                 C   sd  | j | }|d }g }| jr"| d¡r"| |d d j|d d¡ | jr9| d¡r9| |d d j|d d¡ d }| jrG| d¡rG|d }| jrS| d¡rS|d }|d ur¡|d	kr¡|  |¡}|d
krh| j	}n| j
}|d urw|  |¡\}}	ng }t|tƒs„| d¡}
n|}
|
| }t |¡ d |¡}| |d d j|d¡ |r¬|d	 |¡7 }|S |d7 }|S )Nr   r	   r   ©r	   r
   ©r
   r   Úasr_hotwordsr   r   ú, ©r   ú


)ÚCONTEXT_TEMPLATESr   r   ÚappendÚformatr   r   r   rK   r   r   rX   r<   r=   r.   rR   Úshuffler>   ©r%   ÚitemÚlanguageÚtemplateÚpromptÚcontext_linesr   Úneg_hotwordsÚselected_neg_hotwordsÚselected_neg_hotwords_numÚpos_hotwordsr&   r&   r'   Ú
get_promptr   s@   




þzMultiContextPrompt.get_promptr   c                 C   s€  | j | }|d }g }| jr"| d¡r"| |d d j|d d¡ | jr9| d¡r9| |d d j|d d¡ d }| jrG| d¡rG|d }| jrS| d¡rS|d }|d ur¯|d	kr¯td
|› ƒ |  	|¡}|dkro| j
}n| j}|d ur~|  |¡\}}	ng }t|tƒs‹| d¡}
n|}
|
| }td|	› ƒ t |¡ d |¡}| |d d j|d¡ |rº|d	 |¡7 }|S |d7 }|S )Nr   r	   r   rY   r
   rZ   r   r[   r   z
hotwords: r   r\   zselected_neg_hotwords_num: r]   r^   )r_   r   r   r`   ra   r   r   r   ÚprintrK   r   r   rX   r<   r=   r.   rR   rb   r>   rc   r&   r&   r'   Úget_inference_promptŸ   sD   




þz'MultiContextPrompt.get_inference_promptN)TTTTT)r   )
Ú__name__Ú
__module__Ú__qualname__r_   r(   r    rK   rX   rm   ro   r&   r&   r&   r'   r      s2    ýþ	ýþ÷
û!-ÚMultiContextPromptNewc                   @   sb   e Zd Zddddddœdœdd	d
dddœdœdœZ				ddd„Zdd„ Zdd„ Zddd„ZdS )rs   z Please combine the context information to complete the speech transcription task more accurately. If there is no relevant information, we will leave it blank.

z**Context:**
r   r   r   r   )r   Úcontext_headerr   ut   è¯·ç»“åˆä¸Šä¸‹æ–‡ä¿¡æ¯ï¼Œæ›´åŠ å‡†ç¡®åœ°å®Œæˆè¯­éŸ³è½¬å†™ä»»åŠ¡ã€‚å¦‚æžœæ²¡æœ‰ç›¸å…³ä¿¡æ¯ï¼Œæˆ‘ä»¬ä¼šç•™ç©ºã€‚

u   **ä¸Šä¸‹æ–‡ä¿¡æ¯ï¼š**
r   r   r   r   Tc                 K   sF   || _ || _|| _|| _| dd¡| _| dd¡| _| dd¡| _d S )NÚuse_full_hotwords_ratiogš™™™™™É?Úmax_hotwords_numrL   Úmin_hotwords_numé   )r   r   r   r   r   ru   rv   rw   )r%   r   r   r   r   r   r&   r&   r'   r(   æ   s   zMultiContextPromptNew.__init__c                 C   sv   |  d¡}| jdkrt| jt|ƒƒ}nt|ƒ}| j|k r'tj | j|d ¡}n|}tjj||dd}d 	|¡}||fS )Nr\   r   rM   FrN   )
r.   rv   rP   r/   rw   rQ   rR   rS   rT   r>   )r%   r   r2   rv   rW   rV   r&   r&   r'   rX   õ   s   



z'MultiContextPromptNew.hotwords_samplingc                 C   sæ   | j | }|d }g }| jr"| d¡r"| |d d j|d d¡ | jr9| d¡r9| |d d j|d d¡ | jrd| d¡rd|d }tj 	¡ | j
k rP|}n|  |¡\}}| |d d j|d¡ |rq||d	 d
 |¡ 7 }|S )Nr   r	   r   rY   r
   rZ   r   r]   rt   r   )r_   r   r   r`   ra   r   r   rQ   rR   Úrandru   rX   r>   )r%   rd   re   rf   rg   rh   r   rW   r&   r&   r'   rm     s    
z MultiContextPromptNew.get_promptr   c                 C   sª   | j rtj ¡ dk rdnd}| j| }|d }g }|r(| |d d j|d¡ |r7| |d d j|d	¡ |rF| |d d
 j|d¡ |rS||d d |¡ 7 }|S )Ng      à?r   r   r   r   r	   rY   r
   rZ   r   r]   rt   r   )r   rQ   rR   ry   r_   r`   ra   r>   )r%   r	   r
   r   re   rf   rg   rh   r&   r&   r'   ro   !  s   
z*MultiContextPromptNew.get_inference_promptN)TTTT)r   r   r   )rp   rq   rr   r_   r(   rX   rm   ro   r&   r&   r&   r'   rs   Ï   s0    ýý
ýýö
ü)
ÚnumpyrQ   Úfunasr.registerr   r"   rR   r?   Úregisterr   rs   r&   r&   r&   r'   Ú<module>   s    
 
G