o
    i<                     @   s8   d Z ddlmZmZmZ ddlZG dd dejjZdS )z*Branchformer block for Transducer encoder.    )DictOptionalTupleNc                       s   e Zd ZdZejji dfdededejjdejjdejjde	d	e
d
df fddZdedejd
dfddZ	ddejdejdejdeej d
eejejejf f
ddZ		ddejdejdejdeded
eejejf fddZ  ZS )Branchformera  Branchformer module definition.

    Reference: https://arxiv.org/pdf/2207.02971.pdf

    Args:
        block_size: Input/output size.
        linear_size: Linear layers' hidden size.
        self_att: Self-attention module instance.
        conv_mod: Convolution module instance.
        norm_class: Normalization class.
        norm_args: Normalization module arguments.
        dropout_rate: Dropout rate.

    g        
block_sizelinear_sizeself_attconv_mod
norm_class	norm_argsdropout_ratereturnNc                    s   t    || _|| _tjtj||tj | _	tj|d || _
tj|| || _||fi || _||fi || _||fi || _tj|| _|| _|| _d| _dS )z Construct a Branchformer object.   N)super__init__r   r	   torchnn
SequentialLinearGELUchannel_proj1channel_proj2
merge_projnorm_self_attnorm_mlp
norm_finalDropoutdropoutr   r   cache)selfr   r   r   r	   r
   r   r   	__class__ f/home/ubuntu/.local/lib/python3.10/site-packages/espnet2/asr_transducer/encoder/blocks/branchformer.pyr      s   

zBranchformer.__init__left_contextdevicec                 C   s>   t jd|| jf|dt jd| jd | jjd f|dg| _dS )zInitialize/Reset self-attention and convolution modules cache for streaming.

        Args:
            left_context: Number of left frames during chunk-by-chunk inference.
            device: Device to use for cache tensor.

           )r%   r   N)r   zerosr   r   r	   kernel_sizer   )r   r$   r%   r"   r"   r#   reset_streaming_cache9   s   	


z"Branchformer.reset_streaming_cachexpos_encmask
chunk_maskc              
   C   s   |}|}|  |}| | j||||||d}| |}| |}| |\}}| |}| |}|| | tj	||gdd }| 
|}|||fS )a  Encode input sequences.

        Args:
            x: Branchformer input sequences. (B, T, D_block)
            pos_enc: Positional embedding sequences. (B, 2 * (T - 1), D_block)
            mask: Source mask. (B, T)
            chunk_mask: Chunk mask. (T_2, T_2)

        Returns:
            x: Branchformer output sequences. (B, T, D_block)
            mask: Source mask. (B, T)
            pos_enc: Positional embedding sequences. (B, 2 * (T - 1), D_block)

        )r,   r-   dim)r   r   r   r   r   r	   r   r   r   catr   )r   r*   r+   r,   r-   x1x2_r"   r"   r#   forwardP   s   




"

zBranchformer.forwardr   right_contextc                 C   s
  |}|}|  |}|dkrtj| jd |gdd}n|}|}	|dkr4|dd||  | ddf }
n|dd| dddf }
| j|||	|||d}| |}| |}| j|| jd |d\}}| |}|| 	tj||gdd }| 
|}|
|g| _||fS )a  Encode chunk of input sequence.

        Args:
            x: Branchformer input sequences. (B, T, D_block)
            pos_enc: Positional embedding sequences. (B, 2 * (T - 1), D_block)
            mask: Source mask. (B, T_2)
            left_context: Number of frames in left context.
            right_context: Number of frames in right context.

        Returns:
            x: Branchformer output sequences. (B, T, D_block)
            pos_enc: Positional embedding sequences. (B, 2 * (T - 1), D_block)

        r   r&   r/   N)r,   r$   )r   r6   r.   )r   r   r1   r   r   r   r   r	   r   r   r   )r   r*   r+   r,   r$   r6   r2   r3   keyval	att_cache
conv_cacher"   r"   r#   chunk_forward|   s*   
$





zBranchformer.chunk_forward)N)r   r   )__name__
__module____qualname____doc__r   r   	LayerNormintModuler   floatr   r%   r)   Tensorr   r   r5   r;   __classcell__r"   r"   r    r#   r      sd    	!
1r   )	r?   typingr   r   r   r   r   rB   r   r"   r"   r"   r#   <module>   s    