o
    %ݫi`%                     @   s   d Z ddlZddlmZ ddlmZ ddlmZmZ ddl	m
Z
 ddlmZ ddlmZ dd	lmZ G d
d de
ZG dd dejjZG dd dejjZdS )zuThe SpeechBrain implementation of ContextNet by
https://arxiv.org/pdf/2005.03191.pdf

Authors
 * Jianyuan Zhong 2020
    N)Dropout)Swish)Conv1dDepthwiseSeparableConv1d)
Sequential)Linear)BatchNorm1d)AdaptivePoolc                       sB   e Zd ZdZdddddddddd	eejjedf fd
d	Z	  Z
S )
ContextNetaa  This class implements the ContextNet.

    Reference paper: https://arxiv.org/pdf/2005.03191.pdf

    Arguments
    ---------
    input_shape : tuple
        Expected shape of the inputs.
    out_channels : int
        Number of output channels of this model (default 640).
    conv_channels : Optional (list[int])
        Number of output channels for each of the contextnet block. If not provided, it will be initialized as the default setting of above mentioned paper.
    kernel_size : int
        Kernel size of convolution layers (default 3).
    strides: Optional (list[int])
        Striding factor for each context block. This stride is applied at the last convolution layer at each context block. If not provided, it will be initialize as the default setting of above paper.
    num_blocks : int
        Number of context block (default 21).
    num_layers : int
        Number of depthwise convolution layers for each context block (default 5).
    inner_dim : int
        Inner dimension of bottle-neck network of the SE Module (default 12).
    alpha : float
        The factor to scale the output channel of the network (default 1).
    beta : float
        Beta to scale the Swish activation (default 1).
    dropout : float
        Dropout (default 0.15).
    activation : torch class
        Activation function for each context block (default Swish).
    se_activation : torch class
        Activation function for SE Module (default torch.nn.Sigmoid).
    norm : torch class
        Normalization to regularize the model (default BatchNorm1d).
    residuals : Optional (list[bool])
        Whether to apply residual connection at each context block (default None).


    Example
    -------
    >>> inp = torch.randn([8, 48, 40])
    >>> block = ContextNet(input_shape=inp.shape, num_blocks=14)
    >>> out = block(inp)
    >>> out.shape
    torch.Size([8, 6, 640])
    i  N               333333?c                    s`  t  j|d |d u rg dgd dgd }|d u r,dg| }d|d< d|d< d|d	< |d u r5d
g| }| jt|d |dd | j|dd t|trV| j||
dd n| j| dd t|D ]#}t|| |	 }| jt|||||| |
|||||| d| d qb| jt||dd | j|dd t|tr| j||
dd d S | j| dd d S )Ninput_shape   
   i      r            Tr   
conv_start)
layer_name
norm_start	act_startblock_)out_channelskernel_size
num_layers	inner_dimstridebetadropout
activationse_activationnormresidualr   conv_endnorm_endact_end)	super__init__appendr   
isinstancer   rangeintContextNetBlock)selfr   r   conv_channelsr   strides
num_blocksr    r!   alphar#   r$   r%   r&   r'   	residualsichannels	__class__ W/home/ubuntu/.local/lib/python3.10/site-packages/speechbrain/lobes/models/ContextNet.pyr-   C   s^   



zContextNet.__init__)__name__
__module____qualname____doc__r   torchnnSigmoidr   r-   __classcell__r=   r=   r;   r>   r
      s"    2r
   c                       s2   e Zd ZdZejjef fdd	Zdd Z	  Z
S )SEmodulea|  This class implements the Squeeze-and-Excitation module.

    Arguments
    ---------
    input_shape : tuple
        Expected shape of the inputs.
    inner_dim : int
        Inner dimension of bottle-neck network of the SE Module (default 12).
    activation : torch class
        Activation function for SE Module (default torch.nn.Sigmoid).
    norm : torch class
        Normalization to regularize the model (default BatchNorm1d).

    Example
    -------
    >>> inp = torch.randn([8, 120, 40])
    >>> net = SEmodule(input_shape=inp.shape, inner_dim=64)
    >>> out = net(inp)
    >>> out.shape
    torch.Size([8, 120, 40])
    c                    s   t    || _|| _|| _|\}}}t|d| _| jjt|ddd | j| j | j|   t	d| _
tt|d | jd|  t| j|d|  | _d S )Nr   r   )r   r   r"   )
input_size	n_neurons)r,   r-   r!   r'   r%   r   convr.   r   r	   avg_poolr   
bottleneck)r3   r   r!   r%   r'   bztchnr;   r=   r>   r-      s$   



zSEmodule.__init__c                 C   s@   |j \}}}| |}| |}| |}|d|d}|| S ):Processes the input tensor x and returns an output tensor.r   )shaperK   rL   rM   repeat)r3   xrN   rO   rP   avgcontextr=   r=   r>   forward   s   


zSEmodule.forward)r?   r@   rA   rB   rC   rD   rE   r   r-   rW   rF   r=   r=   r;   r>   rG      s    rG   c                       sD   e Zd ZdZdddeejjedf fdd	Z	dd Z
d	d
 Z  ZS )r2   a&  This class implements a block in ContextNet.

    Arguments
    ---------
    out_channels : int
        Number of output channels of this model (default 640).
    kernel_size : int
        Kernel size of convolution layers (default 3).
    num_layers : int
        Number of depthwise convolution layers for this context block (default 5).
    inner_dim : int
        Inner dimension of bottle-neck network of the SE Module (default 12).
    input_shape : tuple
        Expected shape of the inputs.
    stride : int
        Striding factor for this context block (default 1).
    beta : float
        Beta to scale the Swish activation (default 1).
    dropout : float
        Dropout (default 0.15).
    activation : torch class
        Activation function for this context block (default Swish).
    se_activation : torch class
        Activation function for SE Module (default torch.nn.Sigmoid).
    norm : torch class
        Normalization to regularize the model (default BatchNorm1d).
    residual : bool
        Whether to apply residual connection at this context block (default None).

    Example
    -------
    >>> inp = torch.randn([8, 120, 40])
    >>> block = ContextNetBlock(256, 3, 5, 12, input_shape=inp.shape, stride=2)
    >>> out = block(inp)
    >>> out.shape
    torch.Size([8, 60, 256])
    r   r   Tc                    s   t    || _t|d| _t|D ]}| jjt||||d kr"|ndd | j| qt| j	 ||
|d| _
t|| _d | _|rYt|d| _| jjt|d|d | j| t|	trd|	|| _n|	 | _|   d S )Nr   r   )r"   )r   r!   r%   r'   r   )r   r"   )r,   r-   r(   r   Convsr0   r.   r   rG   get_output_shapeSEr   dropreduced_covr   r/   r   r%   _reset_params)r3   r   r   r    r!   r   r"   r#   r$   r%   r&   r'   r(   r9   r;   r=   r>   r-      s:   


zContextNetBlock.__init__c                 C   s<   |  |}| |}| jr|| | }| |}| |S )rQ   )rX   rZ   r\   r%   r[   )r3   rT   outr=   r=   r>   rW   $  s   



zContextNetBlock.forwardc                 C   s,   |   D ]}| dkrtjj| qd S )Nr   )
parametersdimrC   rD   initkaiming_normal_)r3   pr=   r=   r>   r]   -  s
   zContextNetBlock._reset_params)r?   r@   rA   rB   r   rC   rD   rE   r   r-   rW   r]   rF   r=   r=   r;   r>   r2      s    -2	r2   )rB   rC   torch.nnr   speechbrain.nnet.activationsr   speechbrain.nnet.CNNr   r   speechbrain.nnet.containersr   speechbrain.nnet.linearr   speechbrain.nnet.normalizationr   speechbrain.nnet.poolingr	   r
   rD   ModulerG   r2   r=   r=   r=   r>   <module>   s    z>