o
    ¡¿¯i  ã                   @   s<   d Z ddlZddlmZmZ ddlZG dd„ dejjƒZdS )z>Multi-Head attention layers with relative positional encoding.é    N)ÚOptionalÚTuplec                       sN  e Zd ZdZ		d"dededededd	f
‡ fd
d„Zd#dej	dedej	fdd„Z
	d#dej	dej	dej	dedej	f
dd„Z	d#dej	dej	dej	dedej	f
dd„Zdej	dej	dej	deej	ej	ej	f fdd„Z		d$dej	dej	dej	deej	 dej	f
dd„Z			d%dej	dej	dej	dej	dej	deej	 dedej	fd d!„Z‡  ZS )&ÚRelPositionMultiHeadedAttentionz²RelPositionMultiHeadedAttention definition.

    Args:
        num_heads: Number of attention heads.
        embed_size: Embedding size.
        dropout_rate: Dropout rate.

    ç        FÚ	num_headsÚ
embed_sizeÚdropout_rateÚsimplified_attention_scoreÚreturnNc                    s  t ƒ  ¡  || | _|| _| j| |ksJ d||ffƒ‚tj ||¡| _tj ||¡| _tj ||¡| _	tj ||¡| _
|rKtj ||¡| _| j| _n6tjj||dd| _tj t || j¡¡| _tj t || j¡¡| _tjj | j¡ tjj | j¡ | j| _tjj|d| _d| _dS )z)Construct an MultiHeadedAttention object.z3embed_size (%d) must be divisible by num_heads (%d)F)Úbias)ÚpN)ÚsuperÚ__init__Úd_kr   ÚtorchÚnnÚLinearÚlinear_qÚlinear_kÚlinear_vÚ
linear_outÚ
linear_posÚ"compute_simplified_attention_scoreÚcompute_att_scoreÚ	ParameterÚTensorÚ
pos_bias_uÚ
pos_bias_vÚinitÚxavier_uniform_Úcompute_attention_scoreÚDropoutÚdropoutÚattn)Úselfr   r   r   r	   ©Ú	__class__© úd/home/ubuntu/.local/lib/python3.10/site-packages/espnet2/asr_transducer/encoder/modules/attention.pyr      s,   

þ

z(RelPositionMultiHeadedAttention.__init__r   ÚxÚleft_contextc                 C   sR   |j \}}}}|| }| ¡ \}}	}
}|j||||f||	|
| |f||d  dS )zõCompute relative positional encoding.

        Args:
            x: Input sequence. (B, H, T_1, 2 * T_1 - 1)
            left_context: Number of frames in left context.

        Returns:
            x: Output sequence. (B, H, T_1, T_2)

        é   )Ústorage_offset)ÚshapeÚstrideÚ
as_strided)r$   r)   r*   Ú
batch_sizeÚn_headsÚtime1ÚnÚtime2Úbatch_strideÚn_heads_strideÚtime1_strideÚn_strider'   r'   r(   Ú	rel_shift<   s   

ýz)RelPositionMultiHeadedAttention.rel_shiftÚqueryÚkeyÚpos_encc                 C   s`   |   |¡}t || dd¡¡}| j| dd¡ d¡ dd| d¡d¡|d}|| t 	| j
¡ S )a¾  Simplified attention score computation.

        Reference: https://github.com/k2-fsa/icefall/pull/458

        Args:
            query: Transformed query tensor. (B, H, T_1, d_k)
            key: Transformed key tensor. (B, H, T_2, d_k)
            pos_enc: Positional embedding tensor. (B, 2 * T_1 - 1, size)
            left_context: Number of frames in left context.

        Returns:
            : Attention score. (B, H, T_1, T_2)

        é   é   r+   ©r*   )r   r   ÚmatmulÚ	transposer9   Ú	unsqueezeÚrepeatÚsizeÚmathÚsqrtr   )r$   r:   r;   r<   r*   Ú	matrix_acÚ	matrix_bdr'   r'   r(   r   R   s   
"þzBRelPositionMultiHeadedAttention.compute_simplified_attention_scorec           
   	   C   sž   |   |¡ | d¡d| j| j¡}| dd¡}|| j  dd¡}|| j  dd¡}t 	|| dd¡¡}t 	|| 
dddd¡¡}	| j|	|d}	||	 t | j¡ S )at  Attention score computation.

        Args:
            query: Transformed query tensor. (B, H, T_1, d_k)
            key: Transformed key tensor. (B, H, T_2, d_k)
            pos_enc: Positional embedding tensor. (B, 2 * T_1 - 1, size)
            left_context: Number of frames in left context.

        Returns:
            : Attention score. (B, H, T_1, T_2)

        r   éÿÿÿÿr+   r=   éþÿÿÿr>   r?   )r   ÚviewrD   r   r   rA   r   r   r   r@   Úpermuter9   rE   rF   )
r$   r:   r;   r<   r*   r   Úq_with_bias_uÚq_with_bias_vrG   rH   r'   r'   r(   r    r   s    z7RelPositionMultiHeadedAttention.compute_attention_scoreÚvaluec                 C   sz   |  d¡}|  |¡ |d| j| j¡ dd¡}|  |¡ |d| j| j¡ dd¡}|  |¡ |d| j| j¡ dd¡}|||fS )a~  Transform query, key and value.

        Args:
            query: Query tensor. (B, T_1, size)
            key: Key tensor. (B, T_2, size)
            v: Value tensor. (B, T_2, size)

        Returns:
            q: Transformed query tensor. (B, H, T_1, d_k)
            k: Transformed key tensor. (B, H, T_2, d_k)
            v: Transformed value tensor. (B, H, T_2, d_k)

        r   rI   r+   r=   )rD   r   rK   r   r   rA   r   r   )r$   r:   r;   rO   Ún_batchÚqÚkÚvr'   r'   r(   Úforward_qkv’   s   
ýýý
z+RelPositionMultiHeadedAttention.forward_qkvÚscoresÚmaskÚ
chunk_maskc                 C   s¢   |  d¡}| d¡ d¡}|dur| d¡ d¡|@ }| |tdƒ¡}tj|dd |d¡| _|  | j¡}t ||¡}|  	| 
dd¡ ¡  |d| j| j ¡¡}|S )	ai  Compute attention context vector.

        Args:
            value: Transformed value. (B, H, T_2, d_k)
            scores: Attention score. (B, H, T_1, T_2)
            mask: Source mask. (B, T_2)
            chunk_mask: Chunk mask. (T_1, T_1)

        Returns:
           attn_output: Transformed value weighted by attention score. (B, T_1, H * d_k)

        r   r+   r=   Nz-infrI   )Údimr   )rD   rB   Úmasked_fillÚfloatr   Úsoftmaxr#   r"   r@   r   rA   Ú
contiguousrK   r   r   )r$   rO   rU   rV   rW   r0   Úattn_outputr'   r'   r(   Úforward_attention¶   s   

ýz1RelPositionMultiHeadedAttention.forward_attentionc                 C   s8   |   |||¡\}}	}
| j||	||d}| j|
|||dS )a  Compute scaled dot product attention with rel. positional encoding.

        Args:
            query: Query tensor. (B, T_1, size)
            key: Key tensor. (B, T_2, size)
            value: Value tensor. (B, T_2, size)
            pos_enc: Positional embedding tensor. (B, 2 * T_1 - 1, size)
            mask: Source mask. (B, T_2)
            chunk_mask: Chunk mask. (T_1, T_1)
            left_context: Number of frames in left context.

        Returns:
            : Output tensor. (B, T_1, H * d_k)

        r?   )rW   )rT   r   r^   )r$   r:   r;   rO   r<   rV   rW   r*   rQ   rR   rS   rU   r'   r'   r(   ÚforwardÝ   s   z'RelPositionMultiHeadedAttention.forward)r   F)r   )N)Nr   )Ú__name__Ú
__module__Ú__qualname__Ú__doc__ÚintrZ   Úboolr   r   r   r9   r   r    r   rT   r   r^   r_   Ú__classcell__r'   r'   r%   r(   r   	   s     ûþýüûú)ûþýüû
ú%ûþýüû
ú ÿÿÿ
þ)ûþýüû
ú.øþýüûúùø	÷r   )	rc   rE   Útypingr   r   r   r   ÚModuler   r'   r'   r'   r(   Ú<module>   s
    