o
    i_                     @   sT   d Z ddlZddlmZ ddlmZ ddlZ	e
e	e	jjZG dd dejZdS )z-Class Declaration of Transformer's Attention.    Nc                       s,   e Zd ZdZd
 fdd	Zddd	Z  ZS )MultiHeadAttentiona  Multi Head Attention Layer.

    Args:
        n_units (int): Number of input units.
        h (int): Number of attention heads.
        dropout (float): Dropout rate.
        initialW: Initializer to initialize the weight.
        initial_bias: Initializer to initialize the bias.

    :param int h: the number of heads
    :param int n_units: the number of features
    :param float dropout_rate: dropout rate

       皙?Nc                    s   t t|   || dksJ dt| }|  H tj||||d||dd| _tj||||d||dd| _	tj||||d||dd| _
tj||||d||dd| _W d   n1 sew   Y  || | _|| _|| _d| _dS )zInitialize MultiHeadAttention.r   g      ?)scale)initialWinitial_biasN)superr   __init__npsqrt
init_scopeLLinearlinear_qlinear_klinear_v
linear_outd_khdropoutattn)selfn_unitsr   r   r   r   stvd	__class__ e/home/ubuntu/.local/lib/python3.10/site-packages/espnet/nets/chainer_backend/transformer/attention.pyr	      sB   



zMultiHeadAttention.__init__   c              	   C   sp  | j }|du r/| ||d| j| j}| ||d| j| j}| ||d| j| j}n'| ||d| j| j}| ||d| j| j}| ||d| j| j}tt	|dd|
ddddt| j }	|dur|j|g| j dd}t||	||	jtd}	tj|	dd| _t| j| j}
t|
t	|dd}t	|ddd| j| j }| |S )	a  Core function of the Multi-head attention layer.

        Args:
            e_var (chainer.Variable): Variable of input array.
            s_var (chainer.Variable): Variable of source array from encoder.
            mask (chainer.Variable): Attention mask.
            batch (int): Batch size.

        Returns:
            chainer.Variable: Outout of multi-head attention layer.

        Nr      r      )axisf)xpr   reshaper   r   r   r   Fmatmulswapaxes	transposer
   r   stackwherefullshape	MIN_VALUEsoftmaxr   r   r   )r   e_vars_varmaskbatchr$   QKVscoresp_attnxr   r   r   forward?   s&   $
zMultiHeadAttention.forward)r   r   NN)NNr   )__name__
__module____qualname____doc__r	   r:   __classcell__r   r   r   r   r      s    #r   )r>   chainerchainer.functions	functionsr&   chainer.linkslinksr   numpyr
   floatfinfofloat32minr.   Chainr   r   r   r   r   <module>   s   