o
    i                     @   sL   d Z ddlmZmZmZ ddlZG dd dejjZG dd dejjZ	dS )z(Convolution modules for X-former blocks.    )DictOptionalTupleNc                       s   e Zd ZdZej i dfdededejjde	de
dd	f fd
dZ			ddejdeej dedeejejf fddZ  ZS )ConformerConvolutionaH  ConformerConvolution module definition.

    Args:
        channels: The number of channels.
        kernel_size: Size of the convolving kernel.
        activation: Type of activation function.
        norm_args: Normalization module arguments.
        causal: Whether to use causal convolution (set to True if streaming).

    Fchannelskernel_size
activation	norm_argscausalreturnNc                    s   t    |d d dksJ || _tjj|d| dddd| _|r*|d | _d}n	d| _|d d }tjj|||d||d| _tjj	|fi || _
tjj||dddd| _|| _dS )z)Construct an ConformerConvolution object.      r   )r   stridepaddingr   r   groupsN)super__init__r   torchnnConv1dpointwise_conv1lorderdepthwise_convBatchNorm1dnormpointwise_conv2r   )selfr   r   r   r	   r
   r   	__class__ f/home/ubuntu/.local/lib/python3.10/site-packages/espnet2/asr_transducer/encoder/modules/convolution.pyr      s@   
	

zConformerConvolution.__init__r   xcacheright_contextc                 C   s   |  |dd}tjjj|dd}| jdkrX|du r)tjj|| jdfdd}n/tj||gdd}|dkrI|dddd| j|  | f }n|dddd| j df }| 	|}| 
| |}| |dd}||fS )a  Compute convolution module.

        Args:
            x: ConformerConvolution input sequences. (B, T, D_hidden)
            cache: ConformerConvolution input cache. (1, conv_kernel, D_hidden)
            right_context: Number of frames in right context.

        Returns:
            x: ConformerConvolution output sequences. (B, T, D_hidden)
            cache: ConformerConvolution output cache. (1, conv_kernel, D_hidden)

        r   r   dimr   Nconstant        )r   	transposer   r   
functionalglur   padcatr   r   r   r   )r   r"   r#   r$   r    r    r!   forwardE   s   
&
zConformerConvolution.forwardNr   )__name__
__module____qualname____doc__r   r   ReLUintModuler   boolr   Tensorr   r   r.   __classcell__r    r    r   r!   r      s:    4r   c                       s   e Zd ZdZejji ddfdededejjde	de
d	ed
df fddZ		ddejdeej ded
eejejf fddZ  ZS )ConvolutionalSpatialGatingUnita  Convolutional Spatial Gating Unit module definition.

    Args:
        size: Initial size to determine the number of channels.
        kernel_size: Size of the convolving kernel.
        norm_class: Normalization module class.
        norm_args: Normalization module arguments.
        dropout_rate: Dropout rate.
        causal: Whether to use causal convolution (set to True if streaming).

    r(   Fsizer   
norm_classr	   dropout_rater
   r   Nc           	         s   t    |d }|| _|r|d | _d}n	d| _|d d }tjj|||d||d| _||fi || _tj	 | _
tj|| _dS )z2Construct a ConvolutionalSpatialGatingUnit object.r   r   r   r   N)r   r   r   r   r   r   r   convr   Identityr   Dropoutdropout)	r   r;   r   r<   r	   r=   r
   r   r   r   r    r!   r   z   s&   


	z'ConvolutionalSpatialGatingUnit.__init__r   r"   r#   r$   c                 C   s   |j ddd\}}| |dd}| jdkrX|du r)tjj|| jdfdd}n/tj||gdd}|dkrI|dddd| j|  | f }n|dddd| j df }| 	|dd}| 
|| | }||fS )	a  Compute convolution module.

        Args:
            x: ConvolutionalSpatialGatingUnit input sequences. (B, T, D_hidden)
            cache: ConvolutionalSpationGatingUnit input cache.
                   (1, conv_kernel, D_hidden)
            right_context: Number of frames in right context.

        Returns:
            x: ConvolutionalSpatialGatingUnit output sequences. (B, T, D_hidden // 2)

        r   r%   r   r   Nr'   r(   )chunkr   r)   r   r   r   r*   r,   r-   r>   rA   r   )r   r"   r#   r$   x_rx_gr    r    r!   r.      s   
&z&ConvolutionalSpatialGatingUnit.forwardr/   )r0   r1   r2   r3   r   r   	LayerNormr5   r6   r   floatr7   r   r8   r   r   r.   r9   r    r    r   r!   r:   m   s@    (r:   )
r3   typingr   r   r   r   r   r6   r   r:   r    r    r    r!   <module>   s
    e