o
    ´©iË{  ã                   @   s  d dl Z d dlZd dlZd dlmZ d dlmZ d dlm	Z	 d dl
mZ e dd¡G dd„ de jjƒƒZe dd	¡G d
d	„ d	e jjƒƒZe dd¡G dd„ de jjƒƒZe jjdefdd„ƒZe jjdefdd„ƒZG dd„ de jjƒZdd„ Zddd„Zdd„ Zdd„ ZdS )é    N)Útables)Ú	to_device)Úmake_pad_mask)ÚautocastÚpredictor_classesÚCifPredictorc                       s`   e Zd Z					d‡ fdd„	Z					dd	d
„Zddd„Z	ddejdejfdd„Z‡  Z	S )r   ç      ð?çš™™™™™¹?r   çÍÌÌÌÌÌÜ?c	           	         sx   t ƒ  ¡  tj ||fd¡| _tjj|||| d |d| _tj |d¡| _	tjj
|d| _|| _|| _|| _|| _d S )Nr   é   )Úgroups©Úp)ÚsuperÚ__init__ÚtorchÚnnÚConstantPad1dÚpadÚConv1dÚ
cif_conv1dÚLinearÚ
cif_outputÚDropoutÚdropoutÚ	thresholdÚsmooth_factorÚnoise_thresholdÚtail_threshold)	ÚselfÚidimÚl_orderÚr_orderr   r   r   r   r   ©Ú	__class__© úZ/home/ubuntu/.local/lib/python3.10/site-packages/funasr/models/paraformer/cif_predictor.pyr      s   

zCifPredictor.__init__Néÿÿÿÿc                 C   sÐ  t dƒÖ |}| dd¡}|  |¡}	|  |	¡}
|
| }|  |¡}| dd¡}t |¡}|  |¡}t |¡}tj	j
 || j | j ¡}|d urR| dd¡ ¡ }|| }|d urZ|| }| d¡}| d¡}|d urk|}n|d ury||k ¡  d¡}nd }| d¡}|d ur˜||| d d …d f  d| d¡¡9 }n| jdkr©| j||||d\}}}t||| jƒ\}}|d u rÓ| jdkrÓt |¡ tj¡ ¡ }|d d …d |…d d …f }W d   ƒ n1 sÝw   Y  ||||fS ©NFr   é   r'   éþÿÿÿç        ©Úmask)r   Ú	transposer   r   r   r   Úrelur   Úsigmoidr   Ú
functionalr   r   ÚfloatÚsqueezeÚsumÚrepeatÚsizer   Útail_process_fnÚcifr   ÚmaxÚtypeÚint32Úitem)r   ÚhiddenÚtarget_labelr-   Ú	ignore_idÚmask_chunk_predictorÚtarget_label_lengthÚhÚcontextÚqueriesÚmemoryÚoutputÚalphasÚtarget_lengthÚ	token_numÚacoustic_embedsÚcif_peakÚtoken_num_intr%   r%   r&   Úforward(   sL   










(
ÿ€Û'zCifPredictor.forwardc                 C   s  |  ¡ \}}}| j}|d urItj|dftj|jd}	t |	¡}
tj||	gdd}tj|
|gdd}|| }|| }tj||	gdd}t ||¡}ntj	|g|j
d |j¡}t |d¡}tj||gdd}tj|d|f|j
d |j¡}tj||gdd}|jdd}t |¡}|||fS ©Nr   ©ÚdtypeÚdevice©Údim©rP   )r   r   r'   )r6   r   r   ÚzerosÚfloat32rQ   Ú	ones_likeÚcatÚaddÚtensorrP   ÚtoÚreshaper4   Úfloor©r   r=   rG   rI   r-   ÚbÚtÚdr   Úzeros_tÚones_tÚmask_1Úmask_2rU   Útoken_num_floorr%   r%   r&   r7   [   s&   


zCifPredictor.tail_process_fnrG   Úencoder_sequence_lengthc                 C   ó"  |  ¡ \}}tj}| j}|rt tj|dd¡ |¡}nt tj|dd¡ |¡}t |¡ 	¡ }tj
|dd}	t |	¡ |¡}	|	d d …d d d …f  d|d¡}	tj||g|d}
tj
|
dd}
|
d d …d d …d f  dd|¡ |	j¡}
t t |	|
¡¡ |¡}| d¡}tj|ddd }t |d| ¡ ¡}t||d  |j¡}||9 }|d d …d d …d f  dd|¡}t |¡}t |¡}tj
|dd}||k}t |||¡}| tj¡}d| |¡ }tj|dd}| |¡}t|| ¡ d  |¡ |j¡}|| }|}| d¡ |j¡}| ¡ | ¡ fS ©Nr   rR   rT   r   r'   )Úmaxlenr)   ©r6   r   r;   ÚtrainingÚroundr4   r:   r]   r9   r<   Úcumsumr5   Úonesr[   rQ   Útrue_divideÚeqÚclampr   rW   Ú
zeros_likeÚwhereÚboolrP   Údetach©r   rG   rg   Ú
batch_sizeÚmaximum_lengthÚint_typeÚis_trainingrI   Úmax_token_numÚalphas_cumsumÚindexÚ	index_divÚindex_div_bool_zerosÚindex_div_bool_zeros_countÚtoken_num_maskÚindex_div_bool_zeros_count_tilero   rU   ÚcondÚ$index_div_bool_zeros_count_tile_boolÚ#index_div_bool_zeros_count_tile_outÚpredictor_maskÚpredictor_alignmentsÚpredictor_alignments_lengthr%   r%   r&   Úgen_frame_alignmentsr   óV    (

ÿÿ


ý
ÿz!CifPredictor.gen_frame_alignments)r   r	   r   r   r
   ©NNr'   NN©NN)
Ú__name__Ú
__module__Ú__qualname__r   rM   r7   r   ÚTensorrŠ   Ú__classcell__r%   r%   r#   r&   r      s(    ÷
ù
3ÿÿÿÚCifPredictorV2c                       sp   e Zd Z								d‡ fdd	„	Z	
	
		
	
ddd„Zddd„Zddd„Z	
ddejdejfdd„Z	‡  Z
S )r“   r   r	   r   r+   Ú	predictorúseq2seq/cifTc                    s†   t ƒ  ¡  tj ||fd¡| _tj |||| d ¡| _tj |d¡| _	tjj
|d| _|| _|| _|| _|| _|	| _|
| _|| _d S )Nr   r   r   )r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   Ú!tf2torch_tensor_name_prefix_torchÚtf2torch_tensor_name_prefix_tfÚ	tail_mask)r   r    r!   r"   r   r   r   r   r   r–   r—   r˜   r#   r%   r&   r   ®   s   

zCifPredictorV2.__init__Nr'   c                 C   sà  t dƒÞ |}| dd¡}|  |¡}	t |  |	¡¡}
|
 dd¡}
|  |
¡}
t |
¡}tjj	 || j
 | j ¡}|d urG| dd¡ ¡ }|| }|d urO|| }| d¡}| d¡}|d urc| d¡}n|d urq||k ¡  d¡}nd }| d¡}|d ur||| d d …d f  d| d¡¡9 }n!| jdkr±| jr¥| j||||d\}}}n| j|||d d\}}}t||| jƒ\}}|d u rÛ| jdkrÛt |¡ tj¡ ¡ }|d d …d |…d d …f }W d   ƒ n1 såw   Y  ||||fS r(   )r   r.   r   r   r/   r   r   r0   r   r1   r   r   r2   r3   r4   r5   r6   r   r˜   r7   Úcif_v1r   r9   r:   r;   r<   )r   r=   r>   r-   r?   r@   rA   rB   rC   rD   rF   rG   rH   rI   rJ   rK   rL   r%   r%   r&   rM   Ê   sN   







(
ÿÿ€Ù)zCifPredictorV2.forwardc                 K   s\  |  dd¡}|j\}}}|}| dd¡}	|  |	¡}
t |  |
¡¡}| dd¡}|  |¡}t |¡}tj	j
 || j | j ¡}| d¡}g }g }g }g }g }|d urwd|v rwd|d d …d |d d …f< |swd|d d …t|d d d… ƒd …f< |d ur¯d	|v r¯d
|v r¯t|d
 |jd|d
< t|d	 |jd|d	< tj|d
 |fdd}tj|d	 |fdd}|d urå|råtj|d|f|jd}tj| jgg|jd}t ||df¡}tj||fdd}tj||fdd}|jd }t|ƒD ]‘}d}tj||jd}g }g }t|ƒD ]N}|| | }|| | jk r'||7 }| |¡ |||| |  7 }q|| j| || |  7 }| |¡ ||7 }| |¡ || j8 }||| |  }q| |¡ |dkrc| || ¡ n| |¡ | tjt|ƒ|jd¡ | |¡ | |¡ qîtj|dd|d	< tj|d	 dd|d	< tj|dd|d
< tj|d
 dd|d
< t|ƒ}|dkr»|t |d¡d d fS g }t|ƒD ]6}tj|||  |f|jd}|| dkrß| |¡ qÁt || ¡||< | tj|| |fdd¡ qÁtj|dd|d	< tj|d	 dd|d	< tj|dd|d
< tj|d
 dd|d
< t |d¡t |d¡d d fS )NÚis_finalFr   r)   r'   Ú
chunk_sizer+   r   Ú
cif_alphasÚ
cif_hidden©rQ   rR   )Úaxis)ÚgetÚshaper.   r   r   r/   r   r   r0   r   r1   r   r   r3   r4   r   rQ   rX   rU   rZ   r   ÚtileÚranger   ÚappendÚlenÚstackÚ	unsqueezer9   )r   r=   ÚcacheÚkwargsrš   rx   Úlen_timeÚhidden_sizerB   rC   rD   rF   rG   Útoken_lengthÚ
list_firesÚlist_framesÚcache_alphasÚcache_hiddensÚtail_hiddenÚtail_alphasr_   Ú	integrateÚframesÚ
list_frameÚ	list_firer`   ÚalphaÚmax_token_lenÚlist_lsÚ
pad_framesr%   r%   r&   Úforward_chunkÿ   sš   



$









ÿ zCifPredictorV2.forward_chunkc                 C   s>  |  ¡ \}}}| j}|d urItj|dftj|jd}	t |	¡}
tj||	gdd}tj|
|gdd}|| }|| }tj||	gdd}t ||¡}n.tj	|g|j
d |j¡}t |d¡}|dkrntj|| |d¡gdd}n	tj||gdd}tj|d|f|j
d |j¡}tj||gdd}|jdd}t |¡}|||fS rN   )r6   r   r   rU   rV   rQ   rW   rX   rY   rZ   rP   r[   r\   r5   r4   r]   r^   r%   r%   r&   r7   Z  s*   


zCifPredictorV2.tail_process_fnrG   rg   c                 C   rh   ri   rk   rw   r%   r%   r&   rŠ   t  r‹   z#CifPredictorV2.gen_frame_alignments)r   r	   r   r   r+   r”   r•   TrŒ   ©Nr   )rŽ   r   r   r   rM   r»   r7   r   r‘   rŠ   r’   r%   r%   r#   r&   r“   ¬   s0    ô
ù
5
[ÿÿÿÚCifPredictorV2Exportc                       sR   e Zd Z‡ fdd„Zdejdejfdd„Zdejdejfdd„Zdd
d„Z‡  Z	S )r½   c                    sF   t ƒ  ¡  |j| _|j| _|j| _|j| _|j| _|j| _|j| _d S r¼   )	r   r   r   r   r   r   r   r   r   )r   Úmodelr©   r#   r%   r&   r   °  s   
zCifPredictorV2Export.__init__r=   r-   c                 C   s^   |   ||¡\}}| dd¡ ¡ }| d¡}| j|||d\}}}t||| jƒ\}}||||fS )Nr'   r*   r,   )Úforward_cnnr.   r2   r3   r7   Úcif_v1_exportr   )r   r=   r-   rG   rI   rJ   rK   r%   r%   r&   rM   »  s   
zCifPredictorV2Export.forwardc           	      C   s˜   |}|  dd¡}|  |¡}t |  |¡¡}|  dd¡}|  |¡}t |¡}tjj || j	 | j
 ¡}|  dd¡ ¡ }|| }| d¡}| d¡}||fS )Nr   r)   r'   r*   )r.   r   r   r/   r   r   r0   r   r1   r   r   r2   r3   r4   )	r   r=   r-   rB   rC   rD   rF   rG   rI   r%   r%   r&   r¿   È  s   




z CifPredictorV2Export.forward_cnnNc                 C   sØ   |  ¡ \}}}| j}tj|dftj|jd}	t |	¡}
tj||	gdd}tj|
|gdd}|| }|| }tj||	gdd}t ||¡}tj|d|f|j	d 
|j¡}tj||gdd}|jdd}t |¡}|||fS )Nr   rO   rR   rT   r'   )r6   r   r   rU   rV   rQ   rW   rX   rY   rP   r[   r4   r]   r^   r%   r%   r&   r7   Ý  s   


z$CifPredictorV2Export.tail_process_fnr   )
rŽ   r   r   r   r   r‘   rM   r¿   r7   r’   r%   r%   r#   r&   r½   ®  s    þ
ýþ
ýr   c                 C   s  | j }| j}|  ¡ \}}}tj|g|jd |j ¡}tj|||||d}tj||||d}	tj|dtjd tj	¡}
t 
|
¡}tj|
ddd}t 
|¡}d|d d …df< || }|dk}d|	|< |	|
 | }	tj| d¡ dd|f¡|  dd}|| }tj|ddd}| d¡}tj|dd}tj|ddd}d|d< d||< |	t 
|	¡ }||  d¡ d|f¡| |  }tj|ddd}d||< || | | }|jdd}t 
|¡ ¡ jtjd}tj|||||d}tj||d	 |d¡}|| d¡k }|||< ||	fS )
NrT   rO   r   ©rS   rP   ©Údimsr   r'   rR   rž   )rQ   rP   r6   r   rZ   r[   rU   rn   Úfloat64rV   r]   Úrollr§   r5   r4   r9   Úint64ÚarangeÚexpand)r=   rG   r   rQ   rP   rx   rª   r«   r´   ÚfiresÚ
prefix_sumÚprefix_sum_floorÚdislocation_prefix_sumÚdislocation_prefix_sum_floorÚdislocation_diffÚ	fire_idxsÚprefix_sum_hiddenÚshift_framesÚ	batch_lenÚ
batch_idxsÚshift_batch_idxsÚremainsÚremain_framesÚshift_remain_framesÚmax_label_lenÚframe_firesÚindicesÚframe_fires_idxsr%   r%   r&   rÀ   ó  sJ   ÿ

$
 rÀ   c              
   C   s  |   ¡ \}}}tj|g|jd |j¡}tj|g|j| jd}tj||g| j| jd}g }g }	t|ƒD ]}}
|d d …|
f }tj|g|j| jd| }||7 }| 	|¡ ||k}t 
||tj|g|j| jd |¡}t 
|||¡}|| }||d d …d f | d d …|
d d …f  7 }|	 	|¡ t 
|d d …d f  d|¡|d d …d f | d d …|
d d …f  |¡}q3t |d¡}t |	d¡}||k}t | ¡}|d|d f   d¡}t|ƒD ] }|||| f }|  d¡}|||d |…d d …f< ||krõ|}qÕ|d d …d |…d d …f }||fS )NrT   rO   r   r   )r6   r   rZ   rP   r[   rQ   rU   r£   ro   r¤   rt   r5   r¦   rs   )r=   rG   r   rx   rª   r«   r³   Úframer­   r®   r`   r·   Údistribution_completionÚ
fire_placeÚcurÚremaindsrÉ   r´   rÏ   rÙ   rØ   r_   Ú
frame_fireÚ	frame_lenr%   r%   r&   Ú
cif_export,  sN   ÿ
ý*
<ÿ

€rã   c                       s&   e Zd Zd‡ fdd„	Zdd„ Z‡  ZS )Úmae_lossFc                    s(   t t| ƒ ¡  || _tjjdd| _d S )Nr4   )Ú	reduction)r   rä   r   Únormalize_lengthr   r   ÚL1LossÚ	criterion)r   ræ   r#   r%   r&   r   c  s   zmae_loss.__init__c                 C   s8   |  d¡}| jr| ¡  tj¡}|  ||¡}|| }|S )Nr   )r6   ræ   r4   r:   r   rV   rè   )r   r¬   Úpre_token_lengthÚloss_token_normalizerÚlossr%   r%   r&   rM   h  s   
zmae_loss.forward©F)rŽ   r   r   r   rM   r’   r%   r%   r#   r&   rä   a  s    rä   c              
   C   s   |   ¡ \}}}tj|g| jd}tj||g| jd}g }g }	t|ƒD ]y}
|d d …|
f }tj|g| jd| }||7 }| |¡ ||k}t ||tj|g| jd |¡}t |||¡}|| }||d d …d f | d d …|
d d …f  7 }|	 |¡ t |d d …d f  d|¡|d d …d f | d d …|
d d …f  |¡}q"t 	|d¡}t 	|	d¡}g }t 
| d¡¡ ¡ }| ¡ }t|ƒD ];}||d d …f }t ||d d …d d …f dt ||k¡ ¡ ¡}tj||  d¡ |g| jd}| t ||gd¡¡ q¼t 	|d¡|fS )Nrž   r   r'   r   )r6   r   rU   rQ   r£   ro   r¤   rt   r5   r¦   rm   r4   Úintr9   Úindex_selectÚnonzeror3   rX   )r=   rG   r   rx   rª   r«   r³   rÜ   r­   r®   r`   r·   rÝ   rÞ   rß   rà   rÉ   r´   r¹   Ú
len_labelsrØ   r_   ÚfireÚlÚpad_lr%   r%   r&   r8   q  s@   
ÿ*
<ÿ.r8   Fc                 C   sÆ   |   ¡ \}}| j}| j}tj|g| jd | j¡}tj||||d}tj| dtjd tj	¡}t 
|¡}	tj|ddd}
t 
|
¡}d|d d …df< |	| }|dk}d||< || |	 }|ra||fS |S )NrT   rO   r   rÁ   rÂ   r   )r6   rQ   rP   r   rZ   r[   rU   rn   rÄ   rV   r]   rÅ   )rG   r   Úreturn_fire_idxsrx   rª   rQ   rP   rÉ   rÊ   rË   rÌ   rÍ   rÎ   rÏ   r%   r%   r&   Úcif_wo_hidden_v1œ  s&   ÿ

rõ   c                 C   sj  t ||dd\}}| j}| j}|  ¡ \}}}	tj|||	||d}
tj| d¡ dd|	f¡|  dd}|| }
tj	|
ddd}| 
d¡}tj|dd}tj	|ddd}d|d< d||< |t |¡ }||  d¡ d|	f¡| |  }tj	|ddd}d||< |
| | | }
t | 
d¡¡ ¡  ¡ }tj|||	||d}tj||d	 |d¡}|| d¡k }|
||< ||fS )
NT)rô   rO   r'   r   rR   r   rÂ   rž   )rõ   rQ   rP   r6   r   rU   rn   r§   r5   rÅ   r4   r]   rm   rí   r9   rÇ   rÈ   )r=   rG   r   rÉ   rÏ   rQ   rP   rx   rª   r«   r´   rÐ   rÑ   rÒ   rÓ   rÔ   rÕ   rÖ   r×   rØ   rÙ   rÚ   rÛ   r%   r%   r&   r™   ¸  s2   $
 ÿr™   c           
   	   C   sŽ   |   ¡ \}}tj|g| jd}g }t|ƒD ])}| d d …|f }||7 }| |¡ ||k}t ||tj|g| jd|  |¡}qt |d¡}	|	S )Nrž   r   )	r6   r   rU   rQ   r£   r¤   rt   ro   r¦   )
rG   r   rx   rª   r³   r­   r`   r·   rÞ   rÉ   r%   r%   r&   Úcif_wo_hiddenâ  s   
ýrö   rì   )r   ÚloggingÚnumpyÚnpÚfunasr.registerr   Úfunasr.train_utils.device_funcsr   Ú*funasr.models.transformer.utils.nets_utilsr   Útorch.cuda.ampr   Úregisterr   ÚModuler   r“   r½   ÚjitÚscriptr2   rÀ   rã   rä   r8   rõ   r™   rö   r%   r%   r%   r&   Ú<module>   s2   
 
  
D84
+*