o
    ´©iƒ  ã                   @   sZ  d Z ddlZddlZddlZddlmZ ddlmZmZ ddlm  m	Z
 ddlmZ ddlm  m  mZ dd„ Zedd	„ ej d
¡dd… D ƒƒZedkrZddlZej d¡ G dd„ dejƒZG dd„ dejƒZG dd„ dejƒZG dd„ dejƒZG dd„ dejƒZG dd„ dejƒZG dd„ dejƒZG dd„ dejƒZ G dd„ dejƒZ!dS )z&Multi-Head Attention layer definition.é    N)Únn)ÚOptionalÚTuple)Úmake_pad_maskc                 C   sf   | | } |   dd¡} |d u r|| ƒ} | |fS tj|| fdd} | d d …d d …|d  d …f }| |fS )Né   é   ©Údim)Ú	transposeÚtorchÚcat)ÚxÚmaskÚcacheÚpad_fnÚkernel_size© r   úP/home/ubuntu/.local/lib/python3.10/site-packages/funasr/models/sanm/attention.pyÚpreprocess_for_attn   s   þ r   c                 C   s   g | ]}t |ƒ‘qS r   )Úint)Ú.0Úir   r   r   Ú
<listcomp>    s    r   Ú.r   )r   é   c                       s8   e Zd ZdZ‡ fdd„Zdd„ Zdd„ Zdd	„ Z‡  ZS )
ÚMultiHeadedAttentionú±Multi-Head Attention layer.

    Args:
        n_head (int): The number of heads.
        n_feat (int): The number of features.
        dropout_rate (float): Dropout rate.

    c                    s~   t t| ƒ ¡  || dksJ ‚|| | _|| _t ||¡| _t ||¡| _t ||¡| _	t ||¡| _
d| _tj|d| _dS )ú)Construct an MultiHeadedAttention object.r   N©Úp)Úsuperr   Ú__init__Úd_kÚhr   ÚLinearÚlinear_qÚlinear_kÚlinear_vÚ
linear_outÚattnÚDropoutÚdropout)ÚselfÚn_headÚn_featÚdropout_rate©Ú	__class__r   r   r!   1   s   
zMultiHeadedAttention.__init__c                 C   s†   |  d¡}|  |¡ |d| j| j¡}|  |¡ |d| j| j¡}|  |¡ |d| j| j¡}| dd¡}| dd¡}| dd¡}|||fS )á	  Transform query, key and value.

        Args:
            query (torch.Tensor): Query tensor (#batch, time1, size).
            key (torch.Tensor): Key tensor (#batch, time2, size).
            value (torch.Tensor): Value tensor (#batch, time2, size).

        Returns:
            torch.Tensor: Transformed query tensor (#batch, n_head, time1, d_k).
            torch.Tensor: Transformed key tensor (#batch, n_head, time2, d_k).
            torch.Tensor: Transformed value tensor (#batch, n_head, time2, d_k).

        r   éÿÿÿÿr   r   )Úsizer%   Úviewr#   r"   r&   r'   r
   )r,   ÚqueryÚkeyÚvalueÚn_batchÚqÚkÚvr   r   r   Úforward_qkv?   s   

z MultiHeadedAttention.forward_qkvc           	      C   s    |  d¡}|dur(| d¡ d¡}tdƒ }| ||¡}tj|dd |d¡}ntj|dd}|  |¡}t ||¡}| 	dd¡ 
¡  |d| j| j ¡}|  |¡S ©	aÒ  Compute attention context vector.

        Args:
            value (torch.Tensor): Transformed value (#batch, n_head, time2, d_k).
            scores (torch.Tensor): Attention score (#batch, n_head, time1, time2).
            mask (torch.Tensor): Mask (#batch, 1, time2) or (#batch, time1, time2).

        Returns:
            torch.Tensor: Transformed value (#batch, time1, d_model)
                weighted by the attention score (#batch, time1, time2).

        r   Nr   Úinfr3   r   ç        r   ©r4   Ú	unsqueezeÚeqÚfloatÚmasked_fillr   Úsoftmaxr+   Úmatmulr
   Ú
contiguousr5   r#   r"   r(   )	r,   r8   Úscoresr   r9   Ú	min_valuer)   Úp_attnr   r   r   r   Úforward_attentionW   s    
ÿÿ
 ÿ
z&MultiHeadedAttention.forward_attentionc           	      C   sB   |   |||¡\}}}t || dd¡¡t | j¡ }|  |||¡S )áË  Compute scaled dot product attention.

        Args:
            query (torch.Tensor): Query tensor (#batch, time1, size).
            key (torch.Tensor): Key tensor (#batch, time2, size).
            value (torch.Tensor): Value tensor (#batch, time2, size).
            mask (torch.Tensor): Mask tensor (#batch, 1, time2) or
                (#batch, time1, time2).

        Returns:
            torch.Tensor: Output tensor (#batch, time1, d_model).

        éþÿÿÿr3   ©r=   r   rG   r
   ÚmathÚsqrtr"   rL   )	r,   r6   r7   r8   r   r:   r;   r<   rI   r   r   r   Úforwardy   s    zMultiHeadedAttention.forward©	Ú__name__Ú
__module__Ú__qualname__Ú__doc__r!   r=   rL   rR   Ú__classcell__r   r   r0   r   r   '   s    	"r   c                       s\   e Zd ZdZ					d‡ fdd„	Zdd	d
„Zdd„ Zddd„Zddd„Zddd„Z	‡  Z
S )ÚMultiHeadedAttentionSANMr   r   Nr   é   çš™™™™™¹?c              	      s>  t ƒ  ¡  || dksJ ‚|| | _|| _|durZd|v r)tj||||	|
d| _nt ||¡| _d|v d|v d|v g}|g d¢krKt ||d	 ¡| _ntj	||d	 ||	|
|d
| _nt ||¡| _t ||d	 ¡| _d}tj
|d| _tj|||dd|dd| _|d d }|dkrŽ|| }|d | }t ||fd¡| _dS )r   r   NÚo©ÚrÚ
lora_alphaÚlora_dropoutr:   r;   r<   )FFFé   ©r^   r_   r`   Úenable_lorar   r   F©ÚstrideÚpaddingÚgroupsÚbiasr   r@   )r    r!   r"   r#   Úlorar$   r(   r   Úlinear_q_k_vÚMergedLinearr*   r+   ÚConv1dÚ
fsmn_blockÚConstantPad1dr   )r,   r-   Úin_featr.   r/   r   Ú
sanm_shfitÚ	lora_listÚ	lora_rankr_   r`   Úlora_qkv_listr)   Úleft_paddingÚright_paddingr0   r   r   r!   –   sB   



ÿ
ú	ÿz!MultiHeadedAttentionSANM.__init__c                 C   s’   |  ¡ \}}}|d ur t ||ddf¡}|d ur|| }|| }| dd¡}|  |¡}|  |¡}| dd¡}||7 }|  |¡}|d urG|| }|S )Nr3   r   r   )r4   r   Úreshaper
   r   rm   r+   )r,   Úinputsr   Úmask_shfit_chunkÚbÚtÚdr   r   r   r   Úforward_fsmnÏ   s   


z%MultiHeadedAttentionSANM.forward_fsmnc                 C   ó¦   |  ¡ \}}}|  |¡}tj|t| j| j ƒdd\}}}t |||| j| jf¡ dd¡}	t |||| j| jf¡ dd¡}
t |||| j| jf¡ dd¡}|	|
||fS ©r2   r3   r   r   r   ©	r4   rj   r   Úsplitr   r#   r"   rv   r
   ©r,   r   ry   rz   r{   Úq_k_vr:   r;   r<   Úq_hÚk_hÚv_hr   r   r   r=   á   ó   
"ÿÿÿz$MultiHeadedAttentionSANM.forward_qkvc           
      C   ó°   |  d¡}|dur0|dur|| }| d¡ d¡}tdƒ }| ||¡}tj|dd |d¡}ntj|dd}|  |¡}t ||¡}	|	 	dd¡ 
¡  |d| j| j ¡}	|  |	¡S r>   rA   ©
r,   r8   rI   r   Úmask_att_chunk_encoderr9   rJ   r)   rK   r   r   r   r   rL   þ   ó$   
ÿÿ
 ÿ
z*MultiHeadedAttentionSANM.forward_attentionc                 C   sZ   |   |¡\}}}}|  |||¡}	|| jd  }t || dd¡¡}
|  ||
||¡}||	 S ©rM   ç      à¿rN   r3   ©r=   r|   r"   r   rG   r
   rL   )r,   r   r   rx   r‰   rƒ   r„   r…   r<   Úfsmn_memoryrI   Úatt_outsr   r   r   rR   $  s   z MultiHeadedAttentionSANM.forwardc                 C   sæ  |   |¡\}}}}|dur|dks|dkrÏ|dur¦|dd…dd…d|d  …dd…f }	|dd…dd…d|d  …dd…f }
tj|d |fdd}tj|d |fdd}tj|d |	fdd|d< tj|d |
fdd|d< |dkr¥|d dd…dd…||d   d…dd…f |d< |d dd…dd…||d   d…dd…f |d< n)|dd…dd…d|d  …dd…f |dd…dd…d|d  …dd…f d	œ}|}|  |d¡}|| jd
  }t || dd¡¡}|  ||d¡}|| |fS )rM   Nr   r3   r   r;   r   r<   r   ©r;   r<   rŒ   rN   )r=   r   r   r|   r"   rG   r
   rL   )r,   r   r   Ú
chunk_sizeÚ	look_backrƒ   r„   r…   r<   Ú
k_h_strideÚ
v_h_strideÚ	cache_tmprŽ   rI   r   r   r   r   Úforward_chunk9  s,   &&22€$$þz&MultiHeadedAttentionSANM.forward_chunk)r   Nr   rZ   r[   ©N©NN©NNr   )rT   rU   rV   rW   r!   r|   r=   rL   rR   r–   rX   r   r   r0   r   rY   Œ   s    õ
9

&rY   c                       óR   e Zd Z‡ fdd„Zdd„ Zdejdejfdd„Zd	d
„ Zdd„ Z	dd„ Z
‡  ZS )ÚMultiHeadedAttentionSANMExportc                    óR   t ƒ  ¡  |j| _|j| _|j| _|j| _|j| _|j| _d | _| j| j | _	d S r—   ©
r    r!   r"   r#   r(   rj   rm   r   r)   Úall_head_size©r,   Úmodelr0   r   r   r!   b  ó   
ú'MultiHeadedAttentionSANMExport.__init__c                 C   ó^   |\}}|   |¡\}}}}|  ||¡}	|| jd  }t || dd¡¡}
|  ||
|¡}||	 S ©NrŒ   rN   r3   r   ©r,   r   r   Úmask_3d_btdÚmask_4d_bhltrƒ   r„   r…   r<   rŽ   rI   r   r   r   r   rR   n  ó   ú&MultiHeadedAttentionSANMExport.forwardr   Úreturnc                 C   ó6   |  ¡ d d… | j| jf }| |¡}| dddd¡S ©Nr3   r   r   r   ra   ©r4   r#   r"   r5   Úpermute©r,   r   Únew_x_shaper   r   r   Útranspose_for_scoresw  ó   
ú3MultiHeadedAttentionSANMExport.transpose_for_scoresc           	      C   óV   |   |¡}tj|t| j| j ƒdd\}}}|  |¡}|  |¡}|  |¡}||||fS ©Nr3   r   ©rj   r   r€   r   r#   r"   r±   ©	r,   r   r‚   r:   r;   r<   rƒ   r„   r…   r   r   r   r=   |  ó   
"


ú*MultiHeadedAttentionSANMExport.forward_qkvc                 C   óH   || }|  dd¡}|  |¡}|  |¡}|  dd¡}|| }|| }|S ©Nr   r   ©r
   r   rm   ©r,   rw   r   r   r   r   r   r|   „  ó   

ú+MultiHeadedAttentionSANMExport.forward_fsmnc                 C   ób   || }t j|dd}t  ||¡}| dddd¡ ¡ }| ¡ d d… | jf }| |¡}|  |¡S ©Nr3   r   r   r   r   ra   rN   ©	r   rF   rG   r®   rH   r4   rž   r5   r(   ©r,   r8   rI   r   r)   Úcontext_layerÚnew_context_layer_shaper   r   r   rL     ó   

ú0MultiHeadedAttentionSANMExport.forward_attention©rT   rU   rV   r!   rR   r   ÚTensorr±   r=   r|   rL   rX   r   r   r0   r   r›   a  ó    	r›   c                       rš   )r›   c                    rœ   r—   r   rŸ   r0   r   r   r!     r¡   r¢   c                 C   r£   r¤   r   r¥   r   r   r   rR   ©  r¨   r©   r   rª   c                 C   r«   r¬   r­   r¯   r   r   r   r±   ²  r²   r³   c           	      C   r´   rµ   r¶   r·   r   r   r   r=   ·  r¸   r¹   c                 C   rº   r»   r¼   r½   r   r   r   r|   ¿  r¾   r¿   c                 C   rÀ   rÁ   rÂ   rÃ   r   r   r   rL   Ë  rÆ   rÇ   rÈ   r   r   r0   r   r›   œ  rÊ   c                       s,   e Zd ZdZd‡ fdd„	Zd	dd„Z‡  ZS )
ÚMultiHeadedAttentionSANMDecoderr   r   c              	      sv   t ƒ  ¡  tj|d| _tj|||dd|dd| _|d d }|dkr'|| }|d | }t ||fd¡| _|| _	dS )	r   r   r   r   Frd   r   r@   N)
r    r!   r   r*   r+   rl   rm   rn   r   r   )r,   r.   r/   r   rp   rt   ru   r0   r   r   r!   á  s   
ÿ
z(MultiHeadedAttentionSANMDecoder.__init__Nc           	      C   s4  |  ¡ \}}}|dur t ||ddf¡}|dur|| }|| }| dd¡}|  ¡ \}}}|du r<|  |¡}| js;|}n)tj|dd…dd…dd…f |fdd}|dd…dd…| j| d  d…f }|}|  |¡}| dd¡}|  d¡|  d¡kr…|dd…ddd…f }|| }|  	|¡}|dur–|| }||fS )zv
        :param x: (#batch, time1, size).
        :param mask: Mask tensor (#batch, 1, time)
        :return:
        Nr3   r   r   r   )
r4   r   rv   r
   r   Útrainingr   r   rm   r+   )	r,   rw   r   r   rx   ry   rz   r{   r   r   r   r   rR   ó  s2   
€(&

z'MultiHeadedAttentionSANMDecoder.forward)r   r˜   )rT   rU   rV   rW   r!   rR   rX   r   r   r0   r   rË   ×  s    	rË   c                       s&   e Zd Z‡ fdd„Zddd„Z‡  ZS )Ú%MultiHeadedAttentionSANMDecoderExportc                    s,   t ƒ  ¡  |j| _|j| _|j| _d | _d S r—   )r    r!   rm   r   r   r)   rŸ   r0   r   r   r!   '  s
   

z.MultiHeadedAttentionSANMDecoderExport.__init__Nc                 C   sF   t |||| j| jƒ\}}|  |¡}| dd¡}|| }|| }||fS r»   )r   r   r   rm   r
   )r,   rw   r   r   r   r   r   r   rR   .  s   
z-MultiHeadedAttentionSANMDecoderExport.forwardr—   )rT   rU   rV   r!   rR   rX   r   r   r0   r   rÍ   &  s    rÍ   c                       sR   e Zd ZdZ					d‡ fdd„	Zdd	„ Zddd„Zddd„Zddd„Z‡  Z	S )ÚMultiHeadedAttentionCrossAttr   Nr   rZ   r[   c	           
         sF  t ƒ  ¡  || dksJ ‚|| | _|| _|durzd|v r)tj|||||d| _nt ||¡| _d|v d|v g}	|	ddgkrNt |du rF|n||d ¡| _ntj	|du rV|n||d ||||	d	| _d
|v rrtj|||||d| _
n%t ||¡| _
nt ||¡| _t |du r‰|n||d ¡| _t ||¡| _
d| _tj|d| _dS )r   r   Nr:   r]   r;   r<   Fr   rb   r\   r   )r    r!   r"   r#   ri   r$   r%   r   Ú
linear_k_vrk   r(   r)   r*   r+   )
r,   r-   r.   r/   rq   rr   r_   r`   Úencoder_output_sizeÚlora_kv_listr0   r   r   r!   B  sF   



ÿÿú

ÿÿz%MultiHeadedAttentionCrossAtt.__init__c                 C   s¨   |  d¡}|  |¡}t ||d| j| jf¡ dd¡}|  |¡}tj|t	| j| j ƒdd\}}t ||d| j| jf¡ dd¡}	t ||d| j| jf¡ dd¡}
||	|
fS )r2   r   r3   r   r   r   )
r4   r%   r   rv   r#   r"   r
   rÏ   r€   r   )r,   r   Úmemoryry   r:   rƒ   Úk_vr;   r<   r„   r…   r   r   r   r=   w  s   

ÿ
 ÿÿ
z(MultiHeadedAttentionCrossAtt.forward_qkvFc           
      C   s²   |  d¡}|dur(| d¡ d¡}tdƒ }| ||¡}tj|dd |d¡}ntj|dd}|  |¡}t ||¡}	|	 	dd¡ 
¡  |d| j| j ¡}	|rT|  |	¡|fS |  |	¡S r>   rA   )
r,   r8   rI   r   Úret_attnr9   rJ   r)   rK   r   r   r   r   rL   ˜  s$   
ÿÿ
 ÿ
z.MultiHeadedAttentionCrossAtt.forward_attentionc           	      C   sF   |   ||¡\}}}|| jd  }t || dd¡¡}| j||||dS )rM   rŒ   rN   r3   )rÔ   ©r=   r"   r   rG   r
   rL   )	r,   r   rÒ   Úmemory_maskrÔ   rƒ   r„   r…   rI   r   r   r   rR   ¼  s   z$MultiHeadedAttentionCrossAtt.forwardr   c                 C   sB  |   ||¡\}}}|dur‡|dkr‡|durZtj|d |fdd}tj|d |fdd}|dd…dd…||d   d…dd…f |d< |dd…dd…||d   d…dd…f |d< n-|dd…dd…||d   d…dd…f |dd…dd…||d   d…dd…f dœ}	|	}|| jd	  }t || d
d¡¡}
|  ||
d¡|fS )rM   Nr   r;   r   r   r<   r   r   rŒ   rN   r3   )r=   r   r   r"   rG   r
   rL   )r,   r   rÒ   r   r‘   r’   rƒ   r„   r…   r•   rI   r   r   r   r–   Ï  s   .0((þz*MultiHeadedAttentionCrossAtt.forward_chunk)Nr   rZ   r[   N©Fr™   )
rT   rU   rV   rW   r!   r=   rL   rR   r–   rX   r   r   r0   r   rÎ   8  s    ÷5
!
$rÎ   c                       sL   e Zd Z‡ fdd„Zddd„Zdejdejfdd	„Zd
d„ Zdd„ Z	‡  Z
S )Ú"MultiHeadedAttentionCrossAttExportc                    sJ   t ƒ  ¡  |j| _|j| _|j| _|j| _|j| _d | _| j| j | _d S r—   )	r    r!   r"   r#   r%   rÏ   r(   r)   rž   rŸ   r0   r   r   r!   ð  s   
z+MultiHeadedAttentionCrossAttExport.__init__Fc           	      C   sB   |   ||¡\}}}t || dd¡¡t | j¡ }|  ||||¡S )NrN   r3   rO   )	r,   r   rÒ   rÖ   rÔ   r:   r;   r<   rI   r   r   r   rR   ú  s    z*MultiHeadedAttentionCrossAttExport.forwardr   rª   c                 C   r«   r¬   r­   r¯   r   r   r   r±   ÿ  r²   z7MultiHeadedAttentionCrossAttExport.transpose_for_scoresc                 C   s\   |   |¡}|  |¡}tj|t| j| j ƒdd\}}|  |¡}|  |¡}|  |¡}|||fS rµ   )r%   rÏ   r   r€   r   r#   r"   r±   )r,   r   rÒ   r:   rÓ   r;   r<   r   r   r   r=     s   

 



z.MultiHeadedAttentionCrossAttExport.forward_qkvc                 C   s|   ||  |j¡ }tj|dd}t ||¡}| dddd¡ ¡ }| ¡ d d… | jf }| 	|¡}|r9|  
|¡|fS |  
|¡S rÁ   )ÚtoÚdevicer   rF   rG   r®   rH   r4   rž   r5   r(   )r,   r8   rI   r   rÔ   r)   rÄ   rÅ   r   r   r   rL     s   

z4MultiHeadedAttentionCrossAttExport.forward_attentionr×   )rT   rU   rV   r!   rR   r   rÉ   r±   r=   rL   rX   r   r   r0   r   rØ   ï  s    


rØ   c                       s<   e Zd ZdZ‡ fdd„Zdd„ Zddd„Zdd	d
„Z‡  ZS )ÚMultiHeadSelfAttentionr   c                    sf   t t| ƒ ¡  || dksJ ‚|| | _|| _t ||¡| _t ||d ¡| _d| _	tj
|d| _dS )r   r   ra   Nr   )r    rÛ   r!   r"   r#   r   r$   r(   rj   r)   r*   r+   )r,   r-   ro   r.   r/   r0   r   r   r!   &  s   
zMultiHeadSelfAttention.__init__c                 C   r}   r~   r   r   r   r   r   r=   2  r†   z"MultiHeadSelfAttention.forward_qkvNc           
      C   r‡   r>   rA   rˆ   r   r   r   rL   O  rŠ   z(MultiHeadSelfAttention.forward_attentionc           
      C   sH   |   |¡\}}}}|| jd  }t || dd¡¡}|  ||||¡}	|	S r‹   rÕ   )
r,   r   r   r‰   rƒ   r„   r…   r<   rI   r   r   r   r   rR   u  s
   zMultiHeadSelfAttention.forwardr—   rS   r   r   r0   r   rÛ     s    	
&rÛ   )"rW   rP   Únumpyr   r   Útypingr   r   Útorch.nn.functionalÚ
functionalÚFÚ*funasr.models.transformer.utils.nets_utilsr   Úfunasr.models.lora.layersÚmodelsri   Úlayersr   ÚtupleÚ__version__r€   Útorch_versionÚtorch.fxÚfxÚwrapÚModuler   rY   r›   rË   rÍ   rÎ   rØ   rÛ   r   r   r   r   Ú<module>   s2   "e V;;O 8-