o
    %ݫi/L                     @   s   d Z ddlZddlmZ G dd dejZG dd dejZG dd dejZG d	d
 d
ejZG dd dejZ	G dd dejZ
G dd dejZG dd dejZdS )uv   Library implementing normalization.

Authors
 * Mirco Ravanelli 2020
 * Guillermo Cámbara 2021
 * Sarthak Yadav 2022
    Nc                       s:   e Zd ZdZ								d fdd	Zd	d
 Z  ZS )BatchNorm1daJ  Applies 1d batch normalization to the input tensor.

    Arguments
    ---------
    input_shape : tuple
        The expected shape of the input. Alternatively, use ``input_size``.
    input_size : int
        The expected size of the input. Alternatively, use ``input_shape``.
    eps : float
        This value is added to std deviation estimation to improve the numerical
        stability.
    momentum : float
        It is a value used for the running_mean and running_var computation.
    affine : bool
        When set to True, the affine parameters are learned.
    track_running_stats : bool
        When set to True, this module tracks the running mean and variance,
        and when set to False, this module does not track such statistics.
    combine_batch_time : bool
        When true, it combines batch an time axis.
    skip_transpose : bool
        Whether to skip the transposition.


    Example
    -------
    >>> input = torch.randn(100, 10)
    >>> norm = BatchNorm1d(input_shape=input.shape)
    >>> output = norm(input)
    >>> output.shape
    torch.Size([100, 10])
    Nh㈵>皙?TFc	           	         sV   t    || _|| _|d u r|r|d }n|d u r|d }tj|||||d| _d S )N   epsmomentumaffinetrack_running_stats)super__init__combine_batch_timeskip_transposennr   norm)	selfinput_shape
input_sizer   r	   r
   r   r   r   	__class__ R/home/ubuntu/.local/lib/python3.10/site-packages/speechbrain/nnet/normalization.pyr   /   s   

zBatchNorm1d.__init__c                 C   s   |j }| jr,|jdkr||d |d  |d }n||d |d  |d |d }n	| js5|dd}| |}| jrD||}|S | jsM|dd}|S )ag  Returns the normalized input tensor.

        Arguments
        ---------
        x : torch.Tensor (batch, time, [channels])
            input to normalize. 2d or 3d tensors are expected in input
            4d tensors can be used when combine_dims=True.

        Returns
        -------
        x_n : torch.Tensor
            The normalized outputs.
           r   r      r   )shaper   ndimreshaper   	transposer   )r   xshape_orx_nr   r   r   forwardK   s    


zBatchNorm1d.forward)NNr   r   TTFF__name__
__module____qualname____doc__r   r"   __classcell__r   r   r   r   r      s    #r   c                       s6   e Zd ZdZ						d
 fdd	Zdd	 Z  ZS )BatchNorm2da  Applies 2d batch normalization to the input tensor.

    Arguments
    ---------
    input_shape : tuple
        The expected shape of the input. Alternatively, use ``input_size``.
    input_size : int
        The expected size of the input. Alternatively, use ``input_shape``.
    eps : float
        This value is added to std deviation estimation to improve the numerical
        stability.
    momentum : float
        It is a value used for the running_mean and running_var computation.
    affine : bool
        When set to True, the affine parameters are learned.
    track_running_stats : bool
        When set to True, this module tracks the running mean and variance,
        and when set to False, this module does not track such statistics.

    Example
    -------
    >>> input = torch.randn(100, 10, 5, 20)
    >>> norm = BatchNorm2d(input_shape=input.shape)
    >>> output = norm(input)
    >>> output.shape
    torch.Size([100, 10, 5, 20])
    Nr   r   Tc                    L   t    |d u r|d u rtd|d u r|d }tj|||||d| _d S )N+Expected input_shape or input_size as inputr   r   )r   r   
ValueErrorr   r)   r   )r   r   r   r   r	   r
   r   r   r   r   r         
	zBatchNorm2d.__init__c                 C   &   | dd}| |}| dd}|S a&  Returns the normalized input tensor.

        Arguments
        ---------
        x : torch.Tensor (batch, time, channel1, channel2)
            input to normalize. 4d tensors are expected.

        Returns
        -------
        x_n : torch.Tensor
            The normalized outputs.
        r   r   r   r   r   r   r!   r   r   r   r"         
zBatchNorm2d.forward)NNr   r   TTr#   r   r   r   r   r)   o   s    r)   c                       s2   e Zd ZdZ				d	 fdd	Zdd Z  ZS )
	LayerNorma  Applies layer normalization to the input tensor.

    Arguments
    ---------
    input_size : int
        The expected size of the dimension to be normalized.
    input_shape : tuple
        The expected shape of the input.
    eps : float
        This value is added to std deviation estimation to improve the numerical
        stability.
    elementwise_affine : bool
        If True, this module has learnable per-element affine parameters
        initialized to ones (for weights) and zeros (for biases).

    Example
    -------
    >>> input = torch.randn(100, 101, 128)
    >>> norm = LayerNorm(input_shape=input.shape)
    >>> output = norm(input)
    >>> output.shape
    torch.Size([100, 101, 128])
    Nr   Tc                    sF   t    || _|| _|d ur|dd  }tjj|| j| jd| _d S )Nr   )r   elementwise_affine)r   r   r   r4   torchr   r3   r   )r   r   r   r   r4   r   r   r   r      s   
zLayerNorm.__init__c                 C   s
   |  |S )a  Returns the normalized input tensor.

        Arguments
        ---------
        x : torch.Tensor (batch, time, channels)
            input to normalize. 3d or 4d tensors are expected.

        Returns
        -------
        The normalized outputs.
        )r   )r   r   r   r   r   r"      s   
zLayerNorm.forward)NNr   Tr#   r   r   r   r   r3      s    r3   c                       6   e Zd ZdZ						d fdd	Zd	d
 Z  ZS )InstanceNorm1da:  Applies 1d instance normalization to the input tensor.

    Arguments
    ---------
    input_shape : tuple
        The expected shape of the input. Alternatively, use ``input_size``.
    input_size : int
        The expected size of the input. Alternatively, use ``input_shape``.
    eps : float
        This value is added to std deviation estimation to improve the numerical
        stability.
    momentum : float
        It is a value used for the running_mean and running_var computation.
    track_running_stats : bool
        When set to True, this module tracks the running mean and variance,
        and when set to False, this module does not track such statistics.
    affine : bool
        A boolean value that when set to True, this module has learnable
        affine parameters, initialized the same way as done for
        batch normalization. Default: False.

    Example
    -------
    >>> input = torch.randn(100, 10, 20)
    >>> norm = InstanceNorm1d(input_shape=input.shape)
    >>> output = norm(input)
    >>> output.shape
    torch.Size([100, 10, 20])
    Nr   r   TFc                    r*   Nr+   r   )r   r	   r   r
   )r   r   r,   r   r7   r   r   r   r   r   r	   r   r
   r   r   r   r     r-   zInstanceNorm1d.__init__c                 C   r.   )a  Returns the normalized input tensor.

        Arguments
        ---------
        x : torch.Tensor (batch, time, channels)
            input to normalize. 3d tensors are expected.

        Returns
        -------
        x_n : torch.Tensor
            The normalized outputs.
        r   r   r0   r1   r   r   r   r"   -  r2   zInstanceNorm1d.forwardNNr   r   TFr#   r   r   r   r   r7           r7   c                       r6   )InstanceNorm2da@  Applies 2d instance normalization to the input tensor.

    Arguments
    ---------
    input_shape : tuple
        The expected shape of the input. Alternatively, use ``input_size``.
    input_size : int
        The expected size of the input. Alternatively, use ``input_shape``.
    eps : float
        This value is added to std deviation estimation to improve the numerical
        stability.
    momentum : float
        It is a value used for the running_mean and running_var computation.
    track_running_stats : bool
        When set to True, this module tracks the running mean and variance,
        and when set to False, this module does not track such statistics.
    affine : bool
        A boolean value that when set to True, this module has learnable
        affine parameters, initialized the same way as done for
        batch normalization. Default: False.

    Example
    -------
    >>> input = torch.randn(100, 10, 20, 2)
    >>> norm = InstanceNorm2d(input_shape=input.shape)
    >>> output = norm(input)
    >>> output.shape
    torch.Size([100, 10, 20, 2])
    Nr   r   TFc                    r*   r8   )r   r   r,   r   r<   r   r9   r   r   r   r   `  r-   zInstanceNorm2d.__init__c                 C   r.   r/   r0   r1   r   r   r   r"   y  r2   zInstanceNorm2d.forwardr:   r#   r   r   r   r   r<   A  r;   r<   c                       s4   e Zd ZdZ					d	 fdd	Zdd Z  ZS )
	GroupNormab  Applies group normalization to the input tensor.

    Arguments
    ---------
    input_shape : tuple
        The expected shape of the input. Alternatively, use ``input_size``.
    input_size : int
        The expected size of the input. Alternatively, use ``input_shape``.
    num_groups : int
        Number of groups to separate the channels into.
    eps : float
        This value is added to std deviation estimation to improve the numerical
        stability.
    affine : bool
        A boolean value that when set to True, this module has learnable per-channel
        affine parameters initialized to ones (for weights) and zeros (for biases).

    Example
    -------
    >>> input = torch.randn(100, 101, 128)
    >>> norm = GroupNorm(input_size=128, num_groups=128)
    >>> output = norm(input)
    >>> output.shape
    torch.Size([100, 101, 128])
    Nr   Tc                    sl   t    || _|| _|d u r|d u rtd|d u rtd|d ur'|d }tjj||| j| jd| _d S )Nr+   zExpected num_groups as inputr   )r   r
   )	r   r   r   r
   r,   r5   r   r=   r   )r   r   r   
num_groupsr   r
   r   r   r   r     s   
zGroupNorm.__init__c                 C   r.   )a"  Returns the normalized input tensor.

        Arguments
        ---------
        x : torch.Tensor (batch, time, channels)
            input to normalize. 3d or 4d tensors are expected.

        Returns
        -------
        x_n : torch.Tensor
            The normalized outputs.
        r   r   r0   r1   r   r   r   r"     r2   zGroupNorm.forward)NNNr   Tr#   r   r   r   r   r=     s    r=   c                       sH   e Zd ZdZ				ddedededed	ef
 fd
dZdd Z  Z	S )ExponentialMovingAveragea6  
    Applies learnable exponential moving average, as required by learnable PCEN layer

    Arguments
    ---------
    input_size : int
        The expected size of the input.
    coeff_init: float
        Initial smoothing coefficient value
    per_channel: bool
        Controls whether every smoothing coefficients are learned
        independently for every input channel
    trainable: bool
        whether to learn the PCEN parameters or use fixed
    skip_transpose : bool
        If False, uses batch x time x channel convention of speechbrain.
        If True, uses batch x channel x time convention.

    Example
    -------
    >>> inp_tensor = torch.rand([10, 50, 40])
    >>> pcen = ExponentialMovingAverage(40)
    >>> out_tensor = pcen(inp_tensor)
    >>> out_tensor.shape
    torch.Size([10, 50, 40])
    {Gz?FTr   
coeff_initper_channel	trainabler   c                    sV   t    || _|| _|| _|| _| jrt|ntd}tj	|| j |d| _
d S )Nr   requires_grad)r   r   _coeff_init_per_channelr   rC   r5   onesr   	Parameter_weights)r   r   rA   rB   rC   r   weightsr   r   r   r     s    
	
z!ExponentialMovingAverage.__init__c                 C   sd   | j s	|dd}tj| jddd}|dddddf }dd	 }||||}| j s0|dd}|S )
zReturns the normalized input tensor.

        Arguments
         ---------
         x : torch.Tensor (batch, time, channels)
             input to normalize.
        r   r   g              ?)minmaxNr   c                 S   sr   | ddd}| }g }t|jd D ]}|||  d| |  }||d qtj|dd}| ddd}|S )zLoops and accumulates.r   r   r   rL   )dim)permuteranger   append	unsqueezer5   cat)
init_stater   waccresultsixr   r   r   scan  s   z.ExponentialMovingAverage.forward.<locals>.scan)r   r   r5   clamprJ   )r   r   rV   initial_staterZ   outputr   r   r   r"     s   z ExponentialMovingAverage.forward)r@   FTF)
r$   r%   r&   r'   intfloatboolr   r"   r(   r   r   r   r   r?     s$    r?   c                       s\   e Zd ZdZ								dded	ed
edededededef fddZdd Z  ZS )PCENa  
    This class implements a learnable Per-channel energy normalization (PCEN) layer, supporting both
    original PCEN as specified in [1] as well as sPCEN as specified in [2]

    [1] Yuxuan Wang, Pascal Getreuer, Thad Hughes, Richard F. Lyon, Rif A. Saurous, "Trainable Frontend For
    Robust and Far-Field Keyword Spotting", in Proc of ICASSP 2017 (https://arxiv.org/abs/1607.05666)

    [2] Neil Zeghidour, Olivier Teboul, F{'e}lix de Chaumont Quitry & Marco Tagliasacchi, "LEAF: A LEARNABLE FRONTEND
    FOR AUDIO CLASSIFICATION", in Proc of ICLR 2021 (https://arxiv.org/abs/2101.08596)

    The default argument values correspond with those used by [2].

    Arguments
    ---------
    input_size : int
        The expected size of the input.
    alpha: float
        specifies alpha coefficient for PCEN
    smooth_coef: float
        specified smooth coefficient for PCEN
    delta: float
        specifies delta coefficient for PCEN
    root: float
        specifies root coefficient for PCEN
    floor: float
        specifies floor coefficient for PCEN
    trainable: bool
        whether to learn the PCEN parameters or use fixed
    per_channel_smooth_coef: bool
        whether to learn independent smooth coefficients for every channel.
        when True, essentially using sPCEN from [2]
    skip_transpose : bool
        If False, uses batch x time x channel convention of speechbrain.
        If True, uses batch x channel x time convention.

    Example
    -------
    >>> inp_tensor = torch.rand([10, 50, 40])
    >>> pcen = PCEN(40, alpha=0.96)         # sPCEN
    >>> out_tensor = pcen(inp_tensor)
    >>> out_tensor.shape
    torch.Size([10, 50, 40])
    Q?r@          @-q=TFalphasmooth_coefdeltarootfloorrC   per_channel_smooth_coefr   c
           
         s   t    || _|| _|| _|	| _tjt	|| |d| _
tjt	|| |d| _tjt	|| |d| _t|| j| jd|d| _d S )NrD   T)rA   rB   r   rC   )r   r   _smooth_coef_floor_per_channel_smooth_coefr   r   rI   r5   rH   re   rg   rh   r?   ema)
r   r   re   rf   rg   rh   ri   rC   rj   r   r   r   r   r   Z  s*   
zPCEN.__init__c                 C   s   | j s	|dd}t| jtjd|j|jd}t| j	tjd|j|jd}| 
|}d| }|| j| |ddd  | jddd |ddd | jddd|ddd  }| j se|dd}|S )a  Returns the normalized input tensor.

        Arguments
        ---------
        x : torch.Tensor (batch, time, channels)
            input to normalize.

        Returns
        -------
        output : torch.Tensor
            The normalized outputs.
        r   r   rL   )dtypedevice)r   r   r5   rM   re   tensorro   rp   rN   rh   rn   rl   viewrg   )r   r   re   rh   ema_smootherone_over_rootr]   r   r   r   r"   }  s4   
zPCEN.forward)rb   r@   rc   rc   rd   TTF)	r$   r%   r&   r'   r_   r`   r   r"   r(   r   r   r   r   ra   -  s8    /	
#ra   )r'   r5   torch.nnr   Moduler   r)   r3   r7   r<   r=   r?   ra   r   r   r   r   <module>   s    bJ<LLKU