o
    i                     @   sZ   d Z ddlZddlZddlmZ ddlm  mZ eeej	j
ZG dd dejZdS )zLightweight Convolution Module.    N)nnc                       s.   e Zd ZdZ		d fdd	Zdd Z  ZS )LightweightConvolutiona  Lightweight Convolution layer.

    This implementation is based on
    https://github.com/pytorch/fairseq/tree/master/fairseq

    Args:
        wshare (int): the number of kernel of convolution
        n_feat (int): the number of features
        dropout_rate (float): dropout_rate
        kernel_size (int): kernel size (length)
        use_kernel_mask (bool): Use causal mask or not for convolution kernel
        use_bias (bool): Use bias term or not.

    Fc           	         s   t t|   || dksJ || _|| _|| _|| _t|d | _t	
||d | _t	
||| _t	 | _t	t| jd|dd| _|| _| jrVt	t|| _t| jt|d }t| jt|d d }tj||fddd| _dS )z(Construct Lightweight Convolution layer.r         dimN)superr   __init__wshareuse_kernel_maskdropout_ratekernel_sizeintpadding_sizer   Linearlinear1linear2GLUact	ParametertorchTensoruniform_weightuse_biasbiaszerosonescat	unsqueezekernel_mask)	selfr   n_featr   r   r   r   kernel_mask0kernel_mask1	__class__ ]/home/ubuntu/.local/lib/python3.10/site-packages/funasr/models/transformer/utils/lightconv.pyr
      s"   

 zLightweightConvolution.__init__c                 C   s  |}|  \}}}| j}	| |}| |}|dd d|	|}tj| j	| j
| jd}
| jrD| j|j| _|
| jdktd}
tj|
dd}
tj||
| j| jd|||}| jrh|| jddd }|dd}|d	ur| js|dd
}||dkd}| |}|S )a*  Forward of 'Lightweight Convolution'.

        This function takes query, key and value but uses only query.
        This is just for compatibility with self-attention layer (attention.py)

        Args:
            query (torch.Tensor): (batch, time1, d_model) input tensor
            key (torch.Tensor): (batch, time2, d_model) NOT USED
            value (torch.Tensor): (batch, time2, d_model) NOT USED
            mask (torch.Tensor): (batch, time1, time2) mask

        Return:
            x (torch.Tensor): (batch, time1, d_model) output

        r   r   r   )trainingg        z-infr   )paddinggroupsNr   )sizer   r   r   	transpose
contiguousviewFdropoutr   r   r*   r   r!   todevicemasked_fillfloatsoftmaxconv1dr   r   r   r   )r"   querykeyvaluemaskxBTCHr   r(   r(   r)   forward?   s(   

 
zLightweightConvolution.forward)FF)__name__
__module____qualname____doc__r
   rC   __classcell__r(   r(   r&   r)   r      s    #r   )rG   numpyr   r   torch.nn.functional
functionalr2   r7   finfofloat32min	MIN_VALUEModuler   r(   r(   r(   r)   <module>   s    