o
    i                     @   sD   d Z ddlmZmZmZ ddlZddlmZ G dd dejj	Z
dS )z'ConvInput block for Transducer encoder.    )OptionalTupleUnionN)sub_factor_to_paramsc                       s   e Zd ZdZ			ddedeeef deded	ee d
df fddZ	de
jdee
j d
ee
je
jf fddZde
jd
e
jfddZde
jd
e
jfddZded
efddZ  ZS )	ConvInputa  ConvInput module definition.

    Args:
        input_size: Input size.
        conv_size: Convolution size.
        subsampling_factor: Subsampling factor.
        vgg_like: Whether to use a VGG-like network.
        output_size: Block output dimension.

       TN
input_size	conv_sizesubsampling_factorvgg_likeoutput_sizereturnc                    s~  t    |rc|\}}tjtjjd|ddddtj tjj||ddddtj tjdtjj||ddddtj tjj||ddddtj tjd
| _||d d  }d| _	| j
| _n7t||\}	}
}tjtjd|ddtj tj|||	|
tj | _|| }|| _	|	| _|
| _| j| _|| _|dk rd	nd
| _|durtj||| _|| _dS d| _|| _dS )zConstruct a ConvInput object.      )stridepadding)r      )r   r   r   r            N)super__init__torchnn
SequentialConv2dReLU	MaxPool2dconvr
   create_new_vgg_maskcreate_new_maskr   kernel_2stride_2create_new_conv2d_maskr   min_frame_lengthLinearoutputr   )selfr   r	   r
   r   r   
conv_size1
conv_size2output_projr!   r"   conv_2_output_size	__class__ d/home/ubuntu/.local/lib/python3.10/site-packages/espnet2/asr_transducer/encoder/blocks/conv_input.pyr      sP   
	





zConvInput.__init__xmaskc                 C   sl   |  |d}| \}}}}|dd |||| }| jdur)| |}|dur2| |}||fS )a*  Encode input sequences.

        Args:
            x: ConvInput input sequences. (B, T, D_feats)
            mask: Mask of input sequences. (B, 1, T)

        Returns:
            x: ConvInput output sequences. (B, sub(T), D_out)
            mask: Mask of output sequences. (B, 1, sub(T))

        r   r   N)r   	unsqueezesize	transpose
contiguousviewr&   r    )r'   r0   r1   bctfr.   r.   r/   forwardU   s   


zConvInput.forwardc                 C   s   | d| dd  }|ddd|f dddddf }| d| dd  }|ddd|f dddddf }|S )zCreate a new mask for VGG output sequences.

        Args:
            mask: Mask of input sequences. (B, T)

        Returns:
            mask: Mask of output sequences. (B, sub(T))

        r   r   Nr   )r3   )r'   r1   
vgg1_t_len
vgg2_t_lenr.   r.   r/   r   p   s
   
&&zConvInput.create_new_vgg_maskc                 C   s2   |dddddf ddd| j d  | jf S )zCreate new conformer mask for Conv2d output sequences.

        Args:
            mask: Mask of input sequences. (B, T)

        Returns:
            mask: Mask of output sequences. (B, sub(T))

        Nr   r   )r!   r"   )r'   r1   r.   r.   r/   r#      s   2
z ConvInput.create_new_conv2d_maskr3   c                 C   s2   | j r|d d d S |d d | jd | j  S )zReturn the original size before subsampling for a given size.

        Args:
            size: Number of frames after subsampling.

        Returns:
            : Number of frames before subsampling.

        r   r   r   )r   r!   r"   )r'   r3   r.   r.   r/   get_size_before_subsampling   s   
z%ConvInput.get_size_before_subsampling)r   TN)__name__
__module____qualname____doc__intr   r   boolr   r   r   Tensorr;   r   r#   r?   __classcell__r.   r.   r,   r/   r   
   s8    
?
r   )rC   typingr   r   r   r   espnet2.asr_transducer.utilsr   r   Moduler   r.   r.   r.   r/   <module>   s
    