o
    %ݫi &                     @   s   d Z ddlZddlmZmZ ddlmZ ddlmZ ddl	m
Z
mZ G dd dejjZG d	d
 d
eZG dd dejjZdS )zThis is a module to ensemble a convolution (depthwise) encoder with or without residual connection.

Authors
 * Jianyuan Zhong 2020
 * Titouan Parcollet 2023
    N)Conv1dConv2d)
Sequential)	LayerNorm)FilterPropertiesstack_filter_propertiesc                       s6   e Zd ZdZdddejjf fdd	Zdd Z  Z	S )	ConvolutionalSpatialGatingUnita  This module implementing CSGU as defined in:
    Branchformer: Parallel MLP-Attention Architectures
    to Capture Local and Global Context for Speech Recognition
    and Understanding"

    The code is heavily inspired from the original ESPNet
    implementation.

    Arguments
    ---------
    input_size: int
        Size of the feature (channel) dimension.
    kernel_size: int, optional
        Size of the kernel
    dropout: float, optional
        Dropout rate to be applied at the output
    use_linear_after_conv: bool, optional
        If True, will apply a linear transformation of size input_size//2
    activation: torch.class, optional
        Activation function to use on the gate, default is Identity.

    Example
    -------
    >>> x = torch.rand((8, 30, 10))
    >>> conv = ConvolutionalSpatialGatingUnit(input_size=x.shape[-1])
    >>> out = conv(x)
    >>> out.shape
    torch.Size([8, 30, 5])
       g        Fc              
      s   t    || _|| _| | _| jd dkrtd|d }t|| _td d |f||dd|ddd| _	| jrRt
j||| _t
jjj| jjd	d
 t
jj| jj t
jj| j	j	j t
j|| _d S )N   r   z"Input size must be divisible by 2!   samenormalF)input_shapeout_channelskernel_sizestridepaddinggroups	conv_initskip_transposegư>)std)super__init__
input_sizeuse_linear_after_conv
activation
ValueErrorr   normr   convtorchnnLinearlinearinitnormal_weightones_biasDropoutdropout)selfr   r   r)   r   r   
n_channels	__class__ X/home/ubuntu/.local/lib/python3.10/site-packages/speechbrain/lobes/models/convolution.pyr   2   s0   

z'ConvolutionalSpatialGatingUnit.__init__c                 C   sN   |j ddd\}}| |}| |}| jr| |}| |}| || S )z
        Arguments
        ---------
        x: torch.Tensor
            Input tensor, shape (B, T, D)

        Returns
        -------
        out: torch.Tensor
            The processed outputs.
        r
   )dim)chunkr   r   r   r"   r   r)   )r*   xx1x2r.   r.   r/   forwardY   s   



z&ConvolutionalSpatialGatingUnit.forward)
__name__
__module____qualname____doc__r   r    Identityr   r6   __classcell__r.   r.   r,   r/   r      s    !'r   c                       sd   e Zd ZdZddg dg dg dg dg deejjed	d
ddf fdd	Z	de
fddZ  ZS )ConvolutionFrontEnda  This is a module to ensemble a convolution (depthwise) encoder with or
    without residual connection.

    Arguments
    ---------
    input_shape: tuple
        Expected shape of the input tensor.
    num_blocks: int
        Number of block (default 21).
    num_layers_per_block: int
        Number of convolution layers for each block (default 5).
    out_channels: Optional(list[int])
        Number of output channels for each of block.
    kernel_sizes: Optional(list[int])
        Kernel size of convolution blocks.
    strides: Optional(list[int])
        Striding factor for each block, this stride is applied at the last convolution layer at each block.
    dilations: Optional(list[int])
        Dilation factor for each block.
    residuals: Optional(list[bool])
        Whether apply residual connection at each block (default None).
    conv_module: class
        Class to use for constructing conv layers.
    activation: Callable
        Activation function for each block (default LeakyReLU).
    norm: torch class
        Normalization to regularize the model (default BatchNorm1d).
    dropout: float
        Dropout (default 0.1).
    conv_bias: bool
        Whether to add a bias term to convolutional layers.
    padding: str
        Type of padding to apply.
    conv_init: str
        Type of initialization to use for conv layers.

    Example
    -------
    >>> x = torch.rand((8, 30, 10))
    >>> conv = ConvolutionFrontEnd(input_shape=x.shape)
    >>> out = conv(x)
    >>> out.shape
    torch.Size([8, 8, 3, 512])
          )      i   )r>   r>   r>   )r   r
   r
   )r   r   r   )TTT皙?Tr   Nc                    sb   t  j|d t|D ]#}| jt||| || || || || |	|
||d| |||d qd S )Nr   
convblock_)
num_layersr   r   r   dilationresidualconv_moduler   r   r)   
layer_name	conv_biasr   r   )r   r   rangeappend	ConvBlock)r*   r   
num_blocksnum_layers_per_blockr   kernel_sizesstrides	dilations	residualsrH   r   r   r)   rJ   r   r   ir,   r.   r/   r      s(   zConvolutionFrontEnd.__init__returnc                 C   s   t dd |  D S )Nc                 s   s    | ]}|  V  qd S N)get_filter_properties).0blockr.   r.   r/   	<genexpr>   s    
z<ConvolutionFrontEnd.get_filter_properties.<locals>.<genexpr>)r   childrenr*   r.   r.   r/   rW      s   z)ConvolutionFrontEnd.get_filter_properties)r7   r8   r9   r:   r   r   r    	LeakyReLUr   r   r   rW   r<   r.   r.   r,   r/   r=   r   s$    0&r=   c                       sR   e Zd ZdZddddeejjdddddf fd	d
	Zdd Z	de
fddZ  ZS )rM   a  An implementation of convolution block with 1d or 2d convolutions (depthwise).

    Arguments
    ---------
    num_layers : int
        Number of depthwise convolution layers for this block.
    out_channels : int
        Number of output channels of this model (default 640).
    input_shape : tuple
        Expected shape of the input tensor.
    kernel_size : int
        Kernel size of convolution layers (default 3).
    stride : int
        Striding factor for this block (default 1).
    dilation : int
        Dilation factor.
    residual : bool
        Add a residual connection if True.
    conv_module : torch class
        Class to use when constructing conv layers.
    activation : Callable
        Activation function for this block.
    norm : torch class
        Normalization to regularize the model (default BatchNorm1d).
    dropout : float
        Rate to zero outputs at.
    conv_bias : bool
        Add a bias term to conv layers.
    padding : str
        The type of padding to add.
    conv_init : str
        Type of initialization to use for conv layers.

    Example
    -------
    >>> x = torch.rand((8, 30, 10))
    >>> conv = ConvBlock(2, 16, input_shape=x.shape)
    >>> out = conv(x)
    >>> x.shape
    torch.Size([8, 30, 10])
    r>   r   FNrB   Tr   c                    s$  t    t|d| _g | _t|D ]S}||d kr|nd}| jj|||||d| |||d	 | jt|||d |
d urJ| jj|
d| d | jj|	 d| d | jjtj	
|d	| d qd | _d | _|rt|d| _| jj||d|d
d | jj|
dd tj	
|| _d S d S )NrC   r   conv_)r   r   r   rF   rI   r'   r   r   )window_sizer   rF   norm_)rI   act_dropout_r   )r   r   r   rI   r   )r   r   r   convsfilter_propertiesrK   rL   r   r   r    r(   reduce_convdrop)r*   rE   r   r   r   r   rF   rG   rH   r   r   r)   rJ   r   r   rT   layer_strider,   r.   r/   r      sV   
zConvBlock.__init__c                 C   s,   |  |}| jr|| | }| |}|S )z:Processes the input tensor x and returns an output tensor.)rc   re   rf   )r*   r3   outr.   r.   r/   r6   5  s
   

zConvBlock.forwardrU   c                 C   s
   t | jS rV   )r   rd   r\   r.   r.   r/   rW   =  s   
zConvBlock.get_filter_properties)r7   r8   r9   r:   r   r   r    r]   r   r6   r   rW   r<   r.   r.   r,   r/   rM      s     />rM   )r:   r   speechbrain.nnet.CNNr   r   speechbrain.nnet.containersr   speechbrain.nnet.normalizationr   !speechbrain.utils.filter_analysisr   r   r    Moduler   r=   rM   r.   r.   r.   r/   <module>   s    _Z