o
    ´©iì/  ã                   @   s¦   d Z ddlZddlZddlZddlmZ ddlmZmZ ddlm  m	Z
 ddlmZ ddlm  m  mZ G dd„ dejƒZG dd	„ d	eƒZG d
d„ dejƒZdS )z&Multi-Head Attention layer definition.é    N)Únn)ÚOptionalÚTuple)Úmake_pad_maskc                       s8   e Zd ZdZ‡ fdd„Zdd„ Zdd„ Zdd	„ Z‡  ZS )
ÚMultiHeadedAttentionú±Multi-Head Attention layer.

    Args:
        n_head (int): The number of heads.
        n_feat (int): The number of features.
        dropout_rate (float): Dropout rate.

    c                    s~   t t| ƒ ¡  || dksJ ‚|| | _|| _t ||¡| _t ||¡| _t ||¡| _	t ||¡| _
d| _tj|d| _dS )ú)Construct an MultiHeadedAttention object.r   N©Úp)Úsuperr   Ú__init__Úd_kÚhr   ÚLinearÚlinear_qÚlinear_kÚlinear_vÚ
linear_outÚattnÚDropoutÚdropout)ÚselfÚn_headÚn_featÚdropout_rate©Ú	__class__© úP/home/ubuntu/.local/lib/python3.10/site-packages/funasr/models/sond/attention.pyr      s   
zMultiHeadedAttention.__init__c                 C   s†   |  d¡}|  |¡ |d| j| j¡}|  |¡ |d| j| j¡}|  |¡ |d| j| j¡}| dd¡}| dd¡}| dd¡}|||fS )á	  Transform query, key and value.

        Args:
            query (torch.Tensor): Query tensor (#batch, time1, size).
            key (torch.Tensor): Key tensor (#batch, time2, size).
            value (torch.Tensor): Value tensor (#batch, time2, size).

        Returns:
            torch.Tensor: Transformed query tensor (#batch, n_head, time1, d_k).
            torch.Tensor: Transformed key tensor (#batch, n_head, time2, d_k).
            torch.Tensor: Transformed value tensor (#batch, n_head, time2, d_k).

        r   éÿÿÿÿé   é   )Úsizer   Úviewr   r   r   r   Ú	transpose)r   ÚqueryÚkeyÚvalueÚn_batchÚqÚkÚvr   r   r   Úforward_qkv-   s   

z MultiHeadedAttention.forward_qkvc           	      C   s¸   |  d¡}|dur4| d¡ d¡}tt tjd|jd ¡ j¡j	ƒ}| 
||¡}tj|dd 
|d¡}ntj|dd}|  |¡}t ||¡}| dd¡ ¡  |d| j| j ¡}|  |¡S ©	aÒ  Compute attention context vector.

        Args:
            value (torch.Tensor): Transformed value (#batch, n_head, time2, d_k).
            scores (torch.Tensor): Attention score (#batch, n_head, time1, time2).
            mask (torch.Tensor): Mask (#batch, 1, time2) or (#batch, time1, time2).

        Returns:
            torch.Tensor: Transformed value (#batch, time1, d_model)
                weighted by the attention score (#batch, time1, time2).

        r   Nr!   )Údtyper    ©Údimg        r"   ©r#   Ú	unsqueezeÚeqÚfloatÚnumpyÚfinfoÚtorchÚtensorr/   ÚminÚmasked_fillÚsoftmaxr   Úmatmulr%   Ú
contiguousr$   r   r   r   )	r   r(   ÚscoresÚmaskr)   Ú	min_valuer   Úp_attnÚxr   r   r   Úforward_attentionE   s   
"ÿ
 ÿ
z&MultiHeadedAttention.forward_attentionc           	      C   sB   |   |||¡\}}}t || dd¡¡t | j¡ }|  |||¡S )áË  Compute scaled dot product attention.

        Args:
            query (torch.Tensor): Query tensor (#batch, time1, size).
            key (torch.Tensor): Key tensor (#batch, time2, size).
            value (torch.Tensor): Value tensor (#batch, time2, size).
            mask (torch.Tensor): Mask tensor (#batch, 1, time2) or
                (#batch, time1, time2).

        Returns:
            torch.Tensor: Output tensor (#batch, time1, d_model).

        éþÿÿÿr    )r-   r8   r=   r%   ÚmathÚsqrtr   rD   )	r   r&   r'   r(   r@   r*   r+   r,   r?   r   r   r   Úforwarde   s    zMultiHeadedAttention.forward©	Ú__name__Ú
__module__Ú__qualname__Ú__doc__r   r-   rD   rI   Ú__classcell__r   r   r   r   r      s    	 r   c                       s2   e Zd ZdZd	‡ fdd„	Zdd„ Zdd„ Z‡  ZS )
ÚRelPositionMultiHeadedAttentionaµ  Multi-Head Attention layer with relative position encoding (new implementation).

    Details can be found in https://github.com/espnet/espnet/pull/2816.

    Paper: https://arxiv.org/abs/1901.02860

    Args:
        n_head (int): The number of heads.
        n_feat (int): The number of features.
        dropout_rate (float): Dropout rate.
        zero_triu (bool): Whether to zero the upper triangular part of attention matrix.

    Fc                    s|   t ƒ  |||¡ || _tj||dd| _t t | j	| j
¡¡| _t t | j	| j
¡¡| _tjj | j¡ tjj | j¡ dS )z4Construct an RelPositionMultiHeadedAttention object.F)ÚbiasN)r   r   Ú	zero_triur   r   Ú
linear_posÚ	Parameterr8   ÚTensorr   r   Ú
pos_bias_uÚ
pos_bias_vÚinitÚxavier_uniform_)r   r   r   r   rR   r   r   r   r   ‡   s   z(RelPositionMultiHeadedAttention.__init__c                 C   s  t jg | ¡ dd… ¢d‘R |j|jd}t j||gdd}|jg | ¡ dd… ¢| d¡d ‘| d¡‘R Ž }|dd…dd…dd…f  |¡dd…dd…dd…d| d¡d d …f }| jrŠt j	| d¡| d¡f|jd}|t  
|| d¡| d¡ ¡dddd…dd…f  }|S )	zùCompute relative positional encoding.

        Args:
            x (torch.Tensor): Input tensor (batch, head, time1, 2*time1-1).
            time1 means the length of query vector.

        Returns:
            torch.Tensor: Output tensor.

        Né   r!   )Údevicer/   r    r0   r"   )r[   )r8   Úzerosr#   r[   r/   Úcatr$   Úview_asrR   ÚonesÚtril)r   rC   Úzero_padÚx_paddedr_   r   r   r   Ú	rel_shift”   s   *4(ÿ 4z)RelPositionMultiHeadedAttention.rel_shiftc                 C   sÈ   |   |||¡\}}}| dd¡}| d¡}	|  |¡ |	d| j| j¡}
|
 dd¡}
|| j  dd¡}|| j  dd¡}t	 
|| dd¡¡}t	 
||
 dd¡¡}|  |¡}|| t | j¡ }|  |||¡S )aV  Compute 'Scaled Dot Product Attention' with rel. positional encoding.

        Args:
            query (torch.Tensor): Query tensor (#batch, time1, size).
            key (torch.Tensor): Key tensor (#batch, time2, size).
            value (torch.Tensor): Value tensor (#batch, time2, size).
            pos_emb (torch.Tensor): Positional embedding tensor
                (#batch, 2*time1-1, size).
            mask (torch.Tensor): Mask tensor (#batch, 1, time2) or
                (#batch, time1, time2).

        Returns:
            torch.Tensor: Output tensor (#batch, time1, d_model).

        r!   r"   r   r    rF   )r-   r%   r#   rS   r$   r   r   rV   rW   r8   r=   rc   rG   rH   rD   )r   r&   r'   r(   Úpos_embr@   r*   r+   r,   Ún_batch_posr
   Úq_with_bias_uÚq_with_bias_vÚ	matrix_acÚ	matrix_bdr?   r   r   r   rI   ­   s   

z'RelPositionMultiHeadedAttention.forward)F)rK   rL   rM   rN   r   rc   rI   rO   r   r   r   r   rP   x   s
    rP   c                       s<   e Zd ZdZ‡ fdd„Zdd„ Zddd„Zdd	d
„Z‡  ZS )ÚMultiHeadSelfAttentionr   c                    sf   t t| ƒ ¡  || dksJ ‚|| | _|| _t ||¡| _t ||d ¡| _d| _	tj
|d| _dS )r   r   rZ   Nr	   )r   rj   r   r   r   r   r   r   Úlinear_q_k_vr   r   r   )r   r   Úin_featr   r   r   r   r   r   ã   s   
zMultiHeadSelfAttention.__init__c                 C   s¦   |  ¡ \}}}|  |¡}tj|t| j| j ƒdd\}}}t |||| j| jf¡ dd¡}	t |||| j| jf¡ dd¡}
t |||| j| jf¡ dd¡}|	|
||fS )r   r    r0   r!   r"   )	r#   rk   r8   ÚsplitÚintr   r   Úreshaper%   )r   rC   ÚbÚtÚdÚq_k_vr*   r+   r,   Úq_hÚk_hÚv_hr   r   r   r-   ï   s   
"ÿÿÿz"MultiHeadSelfAttention.forward_qkvNc           
      C   sÈ   |  d¡}|dur<|dur|| }| d¡ d¡}tt tjd|jd ¡ j¡j	ƒ}| 
||¡}tj|dd 
|d¡}ntj|dd}|  |¡}t ||¡}	|	 dd¡ ¡  |d| j| j ¡}	|  |	¡S r.   r2   )
r   r(   r?   r@   Úmask_att_chunk_encoderr)   rA   r   rB   rC   r   r   r   rD     s    
"ÿ
 ÿ
z(MultiHeadSelfAttention.forward_attentionc           
      C   sH   |   |¡\}}}}|| jd  }t || dd¡¡}|  ||||¡}	|	S )rE   g      à¿rF   r    )r-   r   r8   r=   r%   rD   )
r   rC   r@   rw   rt   ru   rv   r,   r?   Úatt_outsr   r   r   rI   0  s
   zMultiHeadSelfAttention.forward)NrJ   r   r   r   r   rj   Ù   s    	
$rj   )rN   rG   r6   r8   r   Útypingr   r   Útorch.nn.functionalÚ
functionalÚFÚ*funasr.models.transformer.utils.nets_utilsr   Úfunasr.models.lora.layersÚmodelsÚloraÚlayersÚModuler   rP   rj   r   r   r   r   Ú<module>   s   ca