o
    ´©iB(  ã                   @   sX   d Z ddlZddlmZ ddlmZ ddlmZ G dd„ dejƒZG dd	„ d	ejƒZ	dS )
z(Encoder self-attention layer definition.é    N)Únn)Ú	LayerNorm)ÚVariablec                       s2   e Zd ZdZ			d
‡ fdd„	Zddd	„Z‡  ZS )ÚEncoder_Conformer_LayeráÃ  Encoder layer module.

    Args:
        size (int): Input dimension.
        self_attn (torch.nn.Module): Self-attention module instance.
            `MultiHeadedAttention` or `RelPositionMultiHeadedAttention` instance
            can be used as the argument.
        feed_forward (torch.nn.Module): Feed-forward module instance.
            `PositionwiseFeedForward`, `MultiLayeredConv1d`, or `Conv1dLinear` instance
            can be used as the argument.
        feed_forward_macaron (torch.nn.Module): Additional feed-forward module instance.
            `PositionwiseFeedForward`, `MultiLayeredConv1d`, or `Conv1dLinear` instance
            can be used as the argument.
        conv_module (torch.nn.Module): Convolution module instance.
            `ConvlutionModule` instance can be used as the argument.
        dropout_rate (float): Dropout rate.
        normalize_before (bool): Whether to use layer_norm before the first block.
        concat_after (bool): Whether to concat attention layer's input and output.
            if True, additional linear will be applied.
            i.e. x -> x + linear(concat(x, att(x)))
            if False, no additional linear will be applied. i.e. x -> x + att(x)

    TFr   c
           
         s¼   t t| ƒ ¡  || _|| _|| _|| _t|ƒ| _t|ƒ| _	|dur*t|ƒ| _
d| _nd| _| jdur<t|ƒ| _t|ƒ| _t |¡| _|| _|| _|| _|	| _| jr\t || |¡| _dS dS )z,Construct an Encoder_Conformer_Layer object.Ng      à?g      ð?)Úsuperr   Ú__init__Ú	self_attnÚfeed_forwardÚfeed_forward_macaronÚconv_moduler   Únorm_ffÚnorm_mhaÚnorm_ff_macaronÚff_scaleÚ	norm_convÚ
norm_finalr   ÚDropoutÚdropoutÚsizeÚnormalize_beforeÚconcat_afterÚcca_posÚLinearÚconcat_linear)
Úselfr   r	   r
   r   r   Údropout_rater   r   r   ©Ú	__class__© ú[/home/ubuntu/.local/lib/python3.10/site-packages/funasr/models/mfcca/encoder_layer_mfcca.pyr   +   s,   





ÿz Encoder_Conformer_Layer.__init__Nc           
      C   sr  t |tƒr|d |d }}n|d}}| jdur8|}| jr#|  |¡}|| j|  |  |¡¡  }| js8|  |¡}|}| jrB|  |¡}|du rI|}n@|j|jd |jd d | j	fks\J ‚|dd…dd…dd…f }|dd…dd…dd…f }|du r|dn|dd…dd…dd…f }| j
dk r¥|durœ|  |||||¡}n|  ||||¡}n|  ||||¡}| jrÁtj||fdd}	||  |	¡ }n||  |¡ }| jsÐ|  |¡}| jdurñ|}| jrß|  |¡}||  |  |¡¡ }| jsñ|  |¡}|}| jrû|  |¡}|| j|  |  |¡¡  }| js|  |¡}| jdur|  |¡}|dur*tj||gdd}|dur5||f|fS ||fS )á?  Compute encoded features.

        Args:
            x_input (Union[Tuple, torch.Tensor]): Input tensor w/ or w/o pos emb.
                - w/ pos emb: Tuple of tensors [(#batch, time, size), (1, time, size)].
                - w/o pos emb: Tensor (#batch, time, size).
            mask (torch.Tensor): Mask tensor for the input (#batch, time).
            cache (torch.Tensor): Cache tensor of the input (#batch, time - 1, size).

        Returns:
            torch.Tensor: Output tensor (#batch, time, size).
            torch.Tensor: Mask tensor (#batch, time).

        r   é   Néÿÿÿÿé   )Údim)Ú
isinstanceÚtupler   r   r   r   r   r   Úshaper   r   r	   r   ÚtorchÚcatr   r   r   r   r
   r   )
r   Úx_inputÚmaskÚcacheÚxÚpos_embÚresidualÚx_qÚx_attÚx_concatr   r   r    ÚforwardP   sd   





&&









zEncoder_Conformer_Layer.forward)TFr   ©N©Ú__name__Ú
__module__Ú__qualname__Ú__doc__r   r4   Ú__classcell__r   r   r   r    r      s     ö%r   c                       s0   e Zd ZdZ		d	‡ fdd„	Zd
dd„Z‡  ZS )ÚEncoderLayerr   TFc
           
         sJ   t t| ƒ ¡  || _t||||||||	dd	| _t|ƒ| _t 	|¡| _
dS )z!Construct an EncoderLayer object.r   )r   N)r   r<   r   Úencoder_cros_channel_attenr   Úencoder_csar   r   r   r   r   )
r   r   Úself_attn_cros_channelÚself_attn_conformerÚfeed_forward_csaÚfeed_forward_macaron_csaÚconv_module_csar   r   r   r   r   r    r   ¿   s   ÷
zEncoderLayer.__init__Nc              	   C   sÆ  t |tƒr|d |d }}n|d}}|}|  |¡}| d¡}| d¡}	| d|||	¡ dd¡}
|
 |
 d¡|
 d¡d|
 d¡|
 d¡¡}tt 	|
 d¡d|
 d¡|
 d¡¡ƒ 
|
 
¡ ¡}tt 	|
 d¡d|
 d¡|
 d¡¡ƒ 
|
 
¡ ¡}t ||
|gd¡}|dd…dd…dd…dd…f |dd…dd…ddd…dd…f< |dd…dd	…dd…dd…f |dd…dd…ddd…dd…f< |dd…dd
…dd…dd…f |dd…dd…ddd…dd…f< |dd…dd…dd…dd…f |dd…dd…ddd…dd…f< |dd…dd…dd…dd…f |dd…dd…ddd…dd…f< |
 d||	¡}
| dd| |	¡}|  |
||d¡}| d|||	¡ dd¡ d||	¡}||  |¡ }|durT||f}n|}|  ||¡\}}|||fS )r!   r   r"   Nr$   r#   é   é   éüÿÿÿéýÿÿÿéþÿÿÿé   )r&   r'   r   r   ÚreshapeÚ	transposeÚnewr   r)   ÚzerosÚtyper*   r=   r   r>   )r   r+   r,   Úchannel_sizer-   r.   r/   r0   Út_lengÚd_dimÚx_newÚx_k_vÚ
pad_beforeÚ	pad_afterÚx_padr2   r   r   r    r4   Ý   sD   




*&ÿ&ÿ>>>>>
ý


zEncoderLayer.forward)TFr5   r6   r   r   r   r    r<   ¦   s    !ör<   )
r:   r)   r   Ú$funasr.models.transformer.layer_normr   Útorch.autogradr   ÚModuler   r<   r   r   r   r    Ú<module>   s    