o
    i                     @   s<   d Z ddlZddlmZmZ ddlZG dd dejjZdS )z>Multi-Head attention layers with relative positional encoding.    N)OptionalTuplec                       sN  e Zd ZdZ		d"dededededd	f
 fd
dZd#dej	dedej	fddZ
	d#dej	dej	dej	dedej	f
ddZ	d#dej	dej	dej	dedej	f
ddZdej	dej	dej	deej	ej	ej	f fddZ		d$dej	dej	dej	deej	 dej	f
ddZ			d%dej	dej	dej	dej	dej	deej	 dedej	fd d!Z  ZS )&RelPositionMultiHeadedAttentionzRelPositionMultiHeadedAttention definition.

    Args:
        num_heads: Number of attention heads.
        embed_size: Embedding size.
        dropout_rate: Dropout rate.

            F	num_heads
embed_sizedropout_ratesimplified_attention_scorereturnNc                    s  t    || | _|| _| j| |ksJ d||fftj||| _tj||| _tj||| _	tj||| _
|rKtj||| _| j| _n6tjj||dd| _tjt|| j| _tjt|| j| _tjj| j tjj| j | j| _tjj|d| _d| _dS )z)Construct an MultiHeadedAttention object.z3embed_size (%d) must be divisible by num_heads (%d)F)bias)pN)super__init__d_kr   torchnnLinearlinear_qlinear_klinear_v
linear_out
linear_pos"compute_simplified_attention_scorecompute_att_score	ParameterTensor
pos_bias_u
pos_bias_vinitxavier_uniform_compute_attention_scoreDropoutdropoutattn)selfr   r   r   r	   	__class__ d/home/ubuntu/.local/lib/python3.10/site-packages/espnet2/asr_transducer/encoder/modules/attention.pyr      s,   



z(RelPositionMultiHeadedAttention.__init__r   xleft_contextc                 C   sR   |j \}}}}|| }| \}}	}
}|j||||f||	|
| |f||d  dS )zCompute relative positional encoding.

        Args:
            x: Input sequence. (B, H, T_1, 2 * T_1 - 1)
            left_context: Number of frames in left context.

        Returns:
            x: Output sequence. (B, H, T_1, T_2)

           )storage_offset)shapestride
as_strided)r$   r)   r*   
batch_sizen_headstime1ntime2batch_striden_heads_stridetime1_striden_strider'   r'   r(   	rel_shift<   s   

z)RelPositionMultiHeadedAttention.rel_shiftquerykeypos_encc                 C   s`   |  |}t||dd}| j|ddddd|dd|d}|| t	| j
 S )a  Simplified attention score computation.

        Reference: https://github.com/k2-fsa/icefall/pull/458

        Args:
            query: Transformed query tensor. (B, H, T_1, d_k)
            key: Transformed key tensor. (B, H, T_2, d_k)
            pos_enc: Positional embedding tensor. (B, 2 * T_1 - 1, size)
            left_context: Number of frames in left context.

        Returns:
            : Attention score. (B, H, T_1, T_2)

              r+   r*   )r   r   matmul	transposer9   	unsqueezerepeatsizemathsqrtr   )r$   r:   r;   r<   r*   	matrix_ac	matrix_bdr'   r'   r(   r   R   s   
"zBRelPositionMultiHeadedAttention.compute_simplified_attention_scorec           
   	   C   s   |  ||dd| j| j}|dd}|| j dd}|| j dd}t	||dd}t	||
dddd}	| j|	|d}	||	 t| j S )at  Attention score computation.

        Args:
            query: Transformed query tensor. (B, H, T_1, d_k)
            key: Transformed key tensor. (B, H, T_2, d_k)
            pos_enc: Positional embedding tensor. (B, 2 * T_1 - 1, size)
            left_context: Number of frames in left context.

        Returns:
            : Attention score. (B, H, T_1, T_2)

        r   r+   r=   r>   r?   )r   viewrD   r   r   rA   r   r   r   r@   permuter9   rE   rF   )
r$   r:   r;   r<   r*   r   q_with_bias_uq_with_bias_vrG   rH   r'   r'   r(   r    r   s    z7RelPositionMultiHeadedAttention.compute_attention_scorevaluec                 C   sz   | d}| ||d| j| jdd}| ||d| j| jdd}| ||d| j| jdd}|||fS )a~  Transform query, key and value.

        Args:
            query: Query tensor. (B, T_1, size)
            key: Key tensor. (B, T_2, size)
            v: Value tensor. (B, T_2, size)

        Returns:
            q: Transformed query tensor. (B, H, T_1, d_k)
            k: Transformed key tensor. (B, H, T_2, d_k)
            v: Transformed value tensor. (B, H, T_2, d_k)

        r   rI   r+   r=   )rD   r   rK   r   r   rA   r   r   )r$   r:   r;   rO   n_batchqkvr'   r'   r(   forward_qkv   s   

z+RelPositionMultiHeadedAttention.forward_qkvscoresmask
chunk_maskc                 C   s   | d}|dd}|dur|dd|@ }||td}tj|dd|d| _| | j}t||}| 	|
dd |d| j| j }|S )	ai  Compute attention context vector.

        Args:
            value: Transformed value. (B, H, T_2, d_k)
            scores: Attention score. (B, H, T_1, T_2)
            mask: Source mask. (B, T_2)
            chunk_mask: Chunk mask. (T_1, T_1)

        Returns:
           attn_output: Transformed value weighted by attention score. (B, T_1, H * d_k)

        r   r+   r=   Nz-infrI   )dimr   )rD   rB   masked_fillfloatr   softmaxr#   r"   r@   r   rA   
contiguousrK   r   r   )r$   rO   rU   rV   rW   r0   attn_outputr'   r'   r(   forward_attention   s   

z1RelPositionMultiHeadedAttention.forward_attentionc                 C   s8   |  |||\}}	}
| j||	||d}| j|
|||dS )a  Compute scaled dot product attention with rel. positional encoding.

        Args:
            query: Query tensor. (B, T_1, size)
            key: Key tensor. (B, T_2, size)
            value: Value tensor. (B, T_2, size)
            pos_enc: Positional embedding tensor. (B, 2 * T_1 - 1, size)
            mask: Source mask. (B, T_2)
            chunk_mask: Chunk mask. (T_1, T_1)
            left_context: Number of frames in left context.

        Returns:
            : Output tensor. (B, T_1, H * d_k)

        r?   )rW   )rT   r   r^   )r$   r:   r;   rO   r<   rV   rW   r*   rQ   rR   rS   rU   r'   r'   r(   forward   s   z'RelPositionMultiHeadedAttention.forward)r   F)r   )N)Nr   )__name__
__module____qualname____doc__intrZ   boolr   r   r   r9   r   r    r   rT   r   r^   r_   __classcell__r'   r'   r%   r(   r   	   s    )
%
 
)
.	r   )	rc   rE   typingr   r   r   r   Moduler   r'   r'   r'   r(   <module>   s
    