o
    i                     @   s6   d Z ddlZddlZddlmZ G dd dejZdS )z4Multi-Head Attention Return Weight layer definition.    N)nnc                       s8   e Zd ZdZ fddZdd Zdd Zdd	 Z  ZS )
 MultiHeadedAttentionReturnWeightzMulti-Head Attention layer.

    Args:
        n_head (int): The number of heads.
        n_feat (int): The number of features.
        dropout_rate (float): Dropout rate.

    c                    s~   t t|   || dksJ || | _|| _t||| _t||| _t||| _	t||| _
d| _tj|d| _dS )z5Construct an MultiHeadedAttentionReturnWeight object.r   N)p)superr   __init__d_khr   Linearlinear_qlinear_klinear_v
linear_outattnDropoutdropout)selfn_headn_featdropout_rate	__class__ R/home/ubuntu/.local/lib/python3.10/site-packages/funasr/models/lcbnet/attention.pyr      s   
z)MultiHeadedAttentionReturnWeight.__init__c                 C   s   | d}| ||d| j| j}| ||d| j| j}| ||d| j| j}|dd}|dd}|dd}|||fS )a	  Transform query, key and value.

        Args:
            query (torch.Tensor): Query tensor (#batch, time1, size).
            key (torch.Tensor): Key tensor (#batch, time2, size).
            value (torch.Tensor): Value tensor (#batch, time2, size).

        Returns:
            torch.Tensor: Transformed query tensor (#batch, n_head, time1, d_k).
            torch.Tensor: Transformed key tensor (#batch, n_head, time2, d_k).
            torch.Tensor: Transformed value tensor (#batch, n_head, time2, d_k).

        r         )sizer
   viewr   r   r   r   	transpose)r   querykeyvaluen_batchqkvr   r   r   forward_qkv'   s   

z,MultiHeadedAttentionReturnWeight.forward_qkvc           	      C   s   | d}|dur*|dd}t|jj}|||}tj|dd|d}ntj|dd}| 	|}t
||}|dd |d| j| j }| ||fS )a  Compute attention context vector.

        Args:
            value (torch.Tensor): Transformed value (#batch, n_head, time2, d_k).
            scores (torch.Tensor): Attention score (#batch, n_head, time1, time2).
            mask (torch.Tensor): Mask (#batch, 1, time2) or (#batch, time1, time2).

        Returns:
            torch.Tensor: Transformed value (#batch, time1, d_model)
                weighted by the attention score (#batch, time1, time2).

        r   Nr   r   )dimg        r   )r   	unsqueezeeqtorchfinfodtypeminmasked_fillsoftmaxr   matmulr   
contiguousr   r   r   r   )	r   r!   scoresmaskr"   	min_valuer   p_attnxr   r   r   forward_attention?   s   

 z2MultiHeadedAttentionReturnWeight.forward_attentionc           	      C   sB   |  |||\}}}t||ddt| j }| |||S )a  Compute scaled dot product attention.

        Args:
            query (torch.Tensor): Query tensor (#batch, time1, size).
            key (torch.Tensor): Key tensor (#batch, time2, size).
            value (torch.Tensor): Value tensor (#batch, time2, size).
            mask (torch.Tensor): Mask tensor (#batch, 1, time2) or
                (#batch, time1, time2).

        Returns:
            torch.Tensor: Output tensor (#batch, time1, d_model).

        r   )r&   r*   r0   r   mathsqrtr   r7   )	r   r   r    r!   r3   r#   r$   r%   r2   r   r   r   forward_   s    z(MultiHeadedAttentionReturnWeight.forward)	__name__
__module____qualname____doc__r   r&   r7   r;   __classcell__r   r   r   r   r      s    	 r   )r?   r9   r*   r   Moduler   r   r   r   r   <module>   s
   