o
    i                     @   sZ   d Z ddlZddlZddlmZ ddlm  mZ eeej	j
ZG dd dejZdS )zDynamic Convolution module.    N)nnc                       s.   e Zd ZdZ		d fdd	Zdd Z  ZS )DynamicConvolutiona  Dynamic Convolution layer.

    This implementation is based on
    https://github.com/pytorch/fairseq/tree/master/fairseq

    Args:
        wshare (int): the number of kernel of convolution
        n_feat (int): the number of features
        dropout_rate (float): dropout_rate
        kernel_size (int): kernel size (length)
        use_kernel_mask (bool): Use causal mask or not for convolution kernel
        use_bias (bool): Use bias term or not.

    Fc                    s   t t|   || dksJ || _|| _|| _|| _d| _t	||d | _
t	||| _t	|| jd | | _tj| jj t | _|| _| jrXtt|| _dS dS )z$Construct Dynamic Convolution layer.r   N      )superr   __init__wshareuse_kernel_maskdropout_ratekernel_sizeattnr   Linearlinear1linear2linear_weightinitxavier_uniformweightGLUactuse_bias	ParametertorchTensorbias)selfr   n_featr
   r   r	   r   	__class__ `/home/ubuntu/.local/lib/python3.10/site-packages/funasr/models/transformer/utils/dynamic_conv.pyr      s    

zDynamicConvolution.__init__c                 C   s2  |}|  \}}}| j}	| j}
| |}| |}| |}tj|| j| j	d}|
|||	|
dd }tj||	 | ||
 d  |jd}|
||	|||
 d td}||j}|||	||
f||
 d | |	 ||
 d | ||
 df| |dt|
d d |}| jrttj|||jdd}||d	ktd}tj|dd
}|| _|
||	 ||}|dd }|
||	 t||	 |dd}t||}|dd 
|||}| j r|| j!
ddd }|dd}|dur| js|dd}||dkd	}| "|}|S )a&  Forward of 'Dynamic Convolution'.

        This function takes query, key and value but uses only quert.
        This is just for compatibility with self-attention layer (attention.py)

        Args:
            query (torch.Tensor): (batch, time1, d_model) input tensor
            key (torch.Tensor): (batch, time2, d_model) NOT USED
            value (torch.Tensor): (batch, time2, d_model) NOT USED
            mask (torch.Tensor): (batch, time1, time2) mask

        Return:
            x (torch.Tensor): (batch, time1, d_model) output

        )trainingr   r   )dtypez-inf)devicer   g        )dimN)#sizer   r   r   r   r   Fdropoutr
   r!   view	transpose
contiguousr   zerosr"   fill_floattor$   
as_stridedcopy_narrowintr	   trilones	unsqueezemasked_fillsoftmaxr   bmmr   r   r   )r   querykeyvaluemaskxBTCHkr   
weight_newkernel_maskr   r   r    forward=   sD   


$"<"
zDynamicConvolution.forward)FF)__name__
__module____qualname____doc__r   rG   __classcell__r   r   r   r    r      s    !r   )rK   numpyr   r   torch.nn.functional
functionalr(   r/   finfofloat32min	MIN_VALUEModuler   r   r   r   r    <module>   s    