o
    ix                     @   s8   d Z ddlmZmZmZ ddlZG dd dejjZdS )z'Conformer block for Transducer encoder.    )DictOptionalTupleNc                       s   e Zd ZdZejji dfdedejjdejjdejjdejjdejjd	e	d
e
ddf fddZdedejddfddZ	ddejdejdejdeej deejejejf f
ddZ		ddejdejdejdededeejejf fddZ  ZS )	Conformera  Conformer module definition.

    Args:
        block_size: Input/output size.
        self_att: Self-attention module instance.
        feed_forward: Feed-forward module instance.
        feed_forward_macaron: Feed-forward module instance for macaron network.
        conv_mod: Convolution module instance.
        norm_class: Normalization module class.
        norm_args: Normalization module arguments.
        dropout_rate: Dropout rate.

    g        
block_sizeself_attfeed_forwardfeed_forward_macaronconv_mod
norm_class	norm_argsdropout_ratereturnNc	           	         s   t    || _|| _|| _d| _|| _||fi || _||fi || _||fi || _	||fi || _
||fi || _tj|| _|| _d| _dS )zConstruct a Conformer object.g      ?N)super__init__r   r   r	   feed_forward_scaler
   norm_feed_forwardnorm_self_attnorm_macaron	norm_conv
norm_finaltorchnnDropoutdropoutr   cache)	selfr   r   r   r	   r
   r   r   r   	__class__ c/home/ubuntu/.local/lib/python3.10/site-packages/espnet2/asr_transducer/encoder/blocks/conformer.pyr      s   

zConformer.__init__left_contextdevicec                 C   s:   t jd|| jf|dt jd| j| jjd f|dg| _dS )zInitialize/Reset self-attention and convolution modules cache for streaming.

        Args:
            left_context: Number of left frames during chunk-by-chunk inference.
            device: Device to use for cache tensor.

           )r"   N)r   zerosr   r
   kernel_sizer   )r   r!   r"   r   r   r    reset_streaming_cache9   s   	


zConformer.reset_streaming_cachexpos_encmask
chunk_maskc                 C   s   |}|  |}|| j| | |  }|}| |}|}|| | j||||||d }|}| |}| |\}}|| | }|}| |}|| j| | 	|  }| 
|}|||fS )a  Encode input sequences.

        Args:
            x: Conformer input sequences. (B, T, D_block)
            pos_enc: Positional embedding sequences. (B, 2 * (T - 1), D_block)
            mask: Source mask. (B, T)
            chunk_mask: Chunk mask. (T_2, T_2)

        Returns:
            x: Conformer output sequences. (B, T, D_block)
            mask: Source mask. (B, T)
            pos_enc: Positional embedding sequences. (B, 2 * (T - 1), D_block)

        )r*   )r   r   r   r	   r   r   r   r
   r   r   r   )r   r'   r(   r)   r*   residualx_q_r   r   r    forwardP   s6   






zConformer.forwardr   right_contextc              	   C   s*  |}|  |}|| j| |  }|}| |}|dkr)tj| jd |gdd}n|}|}|dkrC|dd||  | ddf }	n|dd| dddf }	|| j||||||d }|}| |}| j	|| jd |d\}}
|| }|}| 
|}|| j| |  }| |}|	|
g| _||fS )a  Encode chunk of input sequence.

        Args:
            x: Conformer input sequences. (B, T, D_block)
            pos_enc: Positional embedding sequences. (B, 2 * (T - 1), D_block)
            mask: Source mask. (B, T_2)
            left_context: Number of frames in left context.
            right_context: Number of frames in right context.

        Returns:
            x: Conformer output sequences. (B, T, D_block)
            pos_enc: Positional embedding sequences. (B, 2 * (T - 1), D_block)

        r   r#   )dimN)r!   )r   r/   )r   r   r	   r   r   catr   r   r   r
   r   r   r   )r   r'   r(   r)   r!   r/   r+   keyval	att_cache
conv_cacher   r   r    chunk_forward   s@   

$	




zConformer.chunk_forward)N)r   r   )__name__
__module____qualname____doc__r   r   	LayerNormintModuler   floatr   r"   r&   Tensorr   r   r.   r6   __classcell__r   r   r   r    r      sh    	
"
?r   )	r:   typingr   r   r   r   r   r=   r   r   r   r   r    <module>   s    