o
    eiW                     @   sh  d Z ddlZddlZddlmZ ddlmZ ddlZddl	Z	ddl
mZ ddlm  mZ ddlmZ ddlmZ ddlmZ ddlmZ eeZ			
dVddZG dd dejZ		dWddZ		dXddZ		dXddZ		dXddZ						dYddZ 							dZddZ!						d[ddZ"			 		d\d!d"Z#d#d$ Z$d]d%d&Z%			'		d^d(d)Z&d_d+d,Z'					d`d-d.Z(d/d0 Z)d1d2 Z*d3d4 Z+d5d6 Z,d7d8 Z-G d9d: d:ejZ.G d;d< d<e.Z/G d=d> d>ejZ0d?d@ Z1dAdB Z2dCdD Z3G dEdF dFejZ4G dGdH dHejZ5G dIdJ dJejZ6dKdL Z7edMg dNZ8edOdPdQgZ9G dRdS dSejZ:G dTdU dUejZ;dS )az
Losses for training neural networks.

Authors
 * Mirco Ravanelli 2020
 * Samuele Cornell 2020
 * Hwidong Na 2020
 * Yan Gao 2020
 * Titouan Parcollet 2020
    N)
namedtuple)permutations)length_to_mask)filter_ctc_output)unsqueeze_as)
get_loggermeanTc                 C   s   || j d    }||j d    }|rJzddlm} W n ty=   d}|d7 }|d7 }|d7 }|d7 }t|w || | ||||d	S dd
lm}	 | d}
|		|
|||||S )a?  Transducer loss, see `speechbrain/nnet/loss/transducer_loss.py`.

    Arguments
    ---------
    logits : torch.Tensor
        Predicted tensor, of shape [batch, maxT, maxU, num_labels].
    targets : torch.Tensor
        Target tensor, without any blanks, of shape [batch, target_len].
    input_lens : torch.Tensor
        Length of each utterance.
    target_lens : torch.Tensor
        Length of each target sequence.
    blank_index : int
        The location of the blank symbol among the label indices.
    reduction : str
        Specifies the reduction to apply to the output: 'mean' | 'batchmean' | 'sum'.
    use_torchaudio: bool
        If True, use Transducer loss implementation from torchaudio, otherwise,
        use Speechbrain Numba implementation.

    Returns
    -------
    The computed transducer loss.
       r   )	rnnt_losszEThe dependency torchaudio >= 0.10.0 is needed to use Transducer Loss
z/Cannot import torchaudio.functional.rnnt_loss.
z/To use it, please install torchaudio >= 0.10.0
z==================
zMOtherwise, you can use our numba implementation, set `use_torchaudio=False`.
)blank	reduction)
Transducer)
shaperoundinttorchaudio.functionalr
   ImportError%speechbrain.nnet.loss.transducer_lossr   log_softmaxapply)logitstargets
input_lenstarget_lensblank_indexr   use_torchaudior
   err_msgr   	log_probs r   U/home/ubuntu/transcripts/venv/lib/python3.10/site-packages/speechbrain/nnet/losses.pytransducer_loss   s4   !	
r!   c                       s@   e Zd ZdZ fddZdd Zdd Zdd	 Zd
d Z  Z	S )
PitWrappera  
    Permutation Invariant Wrapper to allow Permutation Invariant Training
    (PIT) with existing losses.

    Permutation invariance is calculated over the sources/classes axis which is
    assumed to be the rightmost dimension: predictions and targets tensors are
    assumed to have shape [batch, ..., channels, sources].

    Arguments
    ---------
    base_loss : function
        Base loss function, e.g. torch.nn.MSELoss. It is assumed that it takes
        two arguments:
        predictions and targets and no reduction is performed.
        (if a pytorch loss is used, the user must specify reduction="none").

    Example
    -------
    >>> pit_mse = PitWrapper(nn.MSELoss(reduction="none"))
    >>> targets = torch.rand((2, 32, 4))
    >>> p = (3, 0, 2, 1)
    >>> predictions = targets[..., p]
    >>> loss, opt_p = pit_mse(predictions, targets)
    >>> loss
    tensor([0., 0.])
    c                       t    || _d S N)super__init__	base_loss)selfr'   	__class__r   r    r&   {      

zPitWrapper.__init__c                 C   sZ   d}d}t t|jd D ]}|t|jd |f  }|du s$||kr(|}|}q||fS )a  
        Arguments
        ---------
        loss_mat : torch.Tensor
            Tensor of shape [sources, source] containing loss values for each
            possible permutation of predictions.

        Returns
        -------
        loss : torch.Tensor
            Permutation invariant loss for the current batch, tensor of shape [1]
        assigned_perm : tuple
            Indexes for optimal permutation of the input over sources which
            minimizes the loss.
        Nr   )r   ranger   r   )r(   loss_matlossassigned_permpc_lossr   r   r    	_fast_pit   s   zPitWrapper._fast_pitc                 C   s   | d}|djg dd tt|jd D |dR  }|djdgdd tt|jd D |R  }| ||}t|jdksKJ dd	d tt|jD }|j|d
d d}| |S )a2  
        Arguments
        ---------
        pred : torch.Tensor
            Network prediction for the current example, tensor of
            shape [..., sources].
        target : torch.Tensor
            Target for the current example, tensor of shape [..., sources].

        Returns
        -------
        loss : torch.Tensor
            Permutation invariant loss for the current example, tensor of shape [1]
        assigned_perm : tuple
            Indexes for optimal permutation of the input over sources which
            minimizes the loss.
        r   c                 S      g | ]}d qS r	   r   .0xr   r   r    
<listcomp>       z-PitWrapper._opt_perm_loss.<locals>.<listcomp>r	   c                 S   r4   r5   r   r6   r   r   r    r9      r:      z4Base loss should not perform any reduction operationc                 S   s   g | ]}|qS r   r   r6   r   r   r    r9      r:   Ndim)	size	unsqueezerepeatr,   lenr   r'   r   r2   )r(   predtarget	n_sourcesr-   	mean_overr   r   r    _opt_perm_loss   s,   


zPitWrapper._opt_perm_lossc                 C   sD   t j||jd}t|jd D ]}|| d|| f  ||< q|S )a  
        Arguments
        ---------
        tensor : torch.Tensor
            torch.Tensor to reorder given the optimal permutation, of shape
            [batch, ..., sources].
        p : list of tuples
            List of optimal permutations, e.g. for batch=2 and n_sources=3
            [(0, 1, 2), (0, 2, 1].

        Returns
        -------
        reordered : torch.Tensor
            Reordered tensor given permutation p.
        devicer   .)torch
zeros_likerH   r,   r   clone)r(   tensorr0   	reorderedbr   r   r    reorder_tensor   s   zPitWrapper.reorder_tensorc           	      C   sR   g }g }t ||D ]\}}| ||\}}|| || q	t|}||fS )a|  
        Arguments
        ---------
        preds : torch.Tensor
            Network predictions tensor, of shape
            [batch, channels, ..., sources].
        targets : torch.Tensor
            Target tensor, of shape [batch, channels, ..., sources].

        Returns
        -------
        loss : torch.Tensor
            Permutation invariant loss for current examples, tensor of
            shape [batch]
        perms : list
            List of indexes for optimal permutation of the inputs over
            sources.
            e.g., [(0, 1, 2), (2, 1, 0)] for three sources and 2 examples
            per batch.
        )ziprF   appendrI   stack)	r(   predsr   lossespermsrB   labelr.   r0   r   r   r    forward   s   

zPitWrapper.forward)
__name__
__module____qualname____doc__r&   r2   rF   rO   rW   __classcell__r   r   r)   r    r"   _   s    %r"   c           	   	   C   s   || j d    }||j d    }| dd} |dkr#d}n	|dkr*d}n|}tjjj| ||||d|d}|dkrE||j d  S |dkr`|d}|	|d	
d|	|d	
d S |S )
a  CTC loss.

    Arguments
    ---------
    log_probs : torch.Tensor
        Predicted tensor, of shape [batch, time, chars].
    targets : torch.Tensor
        Target tensor, without any blanks, of shape [batch, target_len]
    input_lens : torch.Tensor
        Length of each utterance.
    target_lens : torch.Tensor
        Length of each target sequence.
    blank_index : int
        The location of the blank symbol among the character indexes.
    reduction : str
        What reduction to apply to the output. 'mean', 'sum', 'batch',
        'batchmean', 'none'.
        See pytorch for 'mean', 'sum', 'none'. The 'batch' option returns
        one loss per item in the batch, 'batchmean' returns sum / batch size.

    Returns
    -------
    The computed CTC loss.
    r	   r   	batchmeansumbatchnoneT)zero_infinityr   r   )r   r   r   	transposerI   nn
functionalctc_lossr>   viewr^   )	r   r   r   r   r   r   reduction_lossr.   Nr   r   r    re      s.   

$re      c                 C   6   t | ||\} }tjtjjjdd}t|| |||dS )a  Compute the true l1 loss, accounting for length differences.

    Arguments
    ---------
    predictions : torch.Tensor
        Predicted tensor, of shape ``[batch, time, *]``.
    targets : torch.Tensor
        Target tensor with the same size as predicted tensor.
    length : torch.Tensor
        Length of each utterance for computing true error with a mask.
    allowed_len_diff : int
        Length difference that will be tolerated before raising an exception.
    reduction : str
        Options are 'mean', 'batch', 'batchmean', 'sum'.
        See pytorch for 'mean', 'sum'. The 'batch' option returns
        one loss per item in the batch, 'batchmean' returns sum / batch size.

    Returns
    -------
    The computed L1 loss.

    Example
    -------
    >>> probs = torch.tensor([[0.9, 0.1, 0.1, 0.9]])
    >>> l1_loss(probs, torch.tensor([[1., 0., 0., 1.]]))
    tensor(0.1000)
    r`   r   )truncate	functoolspartialrI   rc   rd   l1_losscompute_masked_losspredictionsr   lengthallowed_len_diffr   r.   r   r   r    ro   +  
   
ro   c                 C   rj   )a  Compute the true mean squared error, accounting for length differences.

    Arguments
    ---------
    predictions : torch.Tensor
        Predicted tensor, of shape ``[batch, time, *]``.
    targets : torch.Tensor
        Target tensor with the same size as predicted tensor.
    length : torch.Tensor
        Length of each utterance for computing true error with a mask.
    allowed_len_diff : int
        Length difference that will be tolerated before raising an exception.
    reduction : str
        Options are 'mean', 'batch', 'batchmean', 'sum'.
        See pytorch for 'mean', 'sum'. The 'batch' option returns
        one loss per item in the batch, 'batchmean' returns sum / batch size.

    Returns
    -------
    The computed MSE loss.

    Example
    -------
    >>> probs = torch.tensor([[0.9, 0.1, 0.1, 0.9]])
    >>> mse_loss(probs, torch.tensor([[1., 0., 0., 1.]]))
    tensor(0.0100)
    r`   rk   )rl   rm   rn   rI   rc   rd   mse_lossrp   rq   r   r   r    rv   P  ru   rv   c                    sN   t  jdkrt |jdkrt ||\ } fdd}t| | ||dS )a  Computes the classification error at frame or batch level.

    Arguments
    ---------
    probabilities : torch.Tensor
        The posterior probabilities of shape
        [batch, prob] or [batch, frames, prob]
    targets : torch.Tensor
        The targets, of shape [batch] or [batch, frames]
    length : torch.Tensor
        Length of each utterance, if frame-level loss is desired.
    allowed_len_diff : int
        Length difference that will be tolerated before raising an exception.
    reduction : str
        Options are 'mean', 'batch', 'batchmean', 'sum'.
        See pytorch for 'mean', 'sum'. The 'batch' option returns
        one loss per item in the batch, 'batchmean' returns sum / batch size.

    Returns
    -------
    The computed classification error.

    Example
    -------
    >>> probs = torch.tensor([[[0.9, 0.1], [0.1, 0.9]]])
    >>> classification_error(probs, torch.tensor([1, 1]))
    tensor(0.5000)
    ri   r;   c                    s   t j dd} | |k S )z"Computes the classification error.r   r<   )rI   argmaxfloat)rr   r   probabilitiesr   r    error  s   z#classification_error.<locals>.errorrk   )rA   r   rl   rp   long)rz   r   rs   rt   r   r{   r   ry   r    classification_erroru  s   r}           c                 C   sX   t | jdkrt| ||\} }| dd} tjtjjj	|dd}t
|| | |||dS )a  Computes negative log likelihood loss.

    Arguments
    ---------
    log_probabilities : torch.Tensor
        The probabilities after log has been applied.
        Format is [batch, log_p] or [batch, frames, log_p].
    targets : torch.Tensor
        The targets, of shape [batch] or [batch, frames].
    length : torch.Tensor
        Length of each utterance, if frame-level loss is desired.
    label_smoothing : float
        The amount of smoothing to apply to labels (default 0.0, no smoothing)
    allowed_len_diff : int
        Length difference that will be tolerated before raising an exception.
    weight: torch.Tensor
        A manual rescaling weight given to each class.
        If given, has to be a Tensor of size C.
    reduction : str
        Options are 'mean', 'batch', 'batchmean', 'sum'.
        See pytorch for 'mean', 'sum'. The 'batch' option returns
        one loss per item in the batch, 'batchmean' returns sum / batch size.

    Returns
    -------
    The computed NLL loss.

    Example
    -------
    >>> probs = torch.tensor([[0.9, 0.1], [0.1, 0.9]])
    >>> nll_loss(torch.log(probs), torch.tensor([1, 1]))
    tensor(1.2040)
    ri   r	   r   r`   )weightr   label_smoothingr   )rA   r   rl   rb   rm   rn   rI   rc   rd   nll_lossrp   r|   )log_probabilitiesr   rs   r   rt   r   r   r.   r   r   r    r     s    *r   c           	      C   s   t | jt |jd kr| d} t | jdkr!t| ||\} }n|dur)td| d|d} }|durC| dkrC|d}tjt	j
jj||dd}t|| | |||dS )	u  Computes binary cross-entropy (BCE) loss. It also applies the sigmoid
    function directly (this improves the numerical stability).

    Arguments
    ---------
    inputs : torch.Tensor
        The output before applying the final softmax
        Format is [batch[, 1]?] or [batch, frames[, 1]?].
        (Works with or without a singleton dimension at the end).
    targets : torch.Tensor
        The targets, of shape [batch] or [batch, frames].
    length : torch.Tensor
        Length of each utterance, if frame-level loss is desired.
    weight : torch.Tensor
        A manual rescaling weight if provided it’s repeated to match input
        tensor shape.
    pos_weight : torch.Tensor
        A weight of positive examples. Must be a vector with length equal to
        the number of classes.
    reduction: str
        Options are 'mean', 'batch', 'batchmean', 'sum'.
        See pytorch for 'mean', 'sum'. The 'batch' option returns
        one loss per item in the batch, 'batchmean' returns sum / batch size.
    allowed_len_diff : int
        Length difference that will be tolerated before raising an exception.
    label_smoothing : float
        The amount of smoothing to apply to labels (default 0.0, no smoothing)

    Returns
    -------
    The computed BCE loss.

    Example
    -------
    >>> inputs = torch.tensor([10.0, -6.0])
    >>> targets = torch.tensor([1, 0])
    >>> bce_loss(inputs, targets)
    tensor(0.0013)
    r	   r   r;   Nz+length can be passed only for >= 2D inputs.r`   )r   
pos_weightr   r   )rA   r   squeezerl   
ValueErrorr?   r=   rm   rn   rI   rc   rd    binary_cross_entropy_with_logitsrp   rx   )	inputsr   rs   r   r   r   rt   r   r.   r   r   r    bce_loss  s.   2

r   c                 C   sN  |dkr|   dkr| d} | j\}}}	|  }d| }
| d|	} |d}t ) |  }|	||	d   ||k}|
|d}|d|d|
 W d   n1 sZw   Y  tjjj| |dd}|
|dd}|dkr||  S |d	kr| | S |d
kr||dd| S |dkr| S |S t| |||dS )a  Computes the KL-divergence error at the batch level.
    This loss applies label smoothing directly to the targets

    Arguments
    ---------
    log_probabilities : torch.Tensor
        The posterior probabilities of shape
        [batch, prob] or [batch, frames, prob].
    targets : torch.Tensor
        The targets, of shape [batch] or [batch, frames].
    length : torch.Tensor
        Length of each utterance, if frame-level loss is desired.
    label_smoothing : float
        The amount of smoothing to apply to labels (default 0.0, no smoothing)
    allowed_len_diff : int
        Length difference that will be tolerated before raising an exception.
    pad_idx : int
        Entries of this value are considered padding.
    reduction : str
        Options are 'mean', 'batch', 'batchmean', 'sum'.
        See pytorch for 'mean', 'sum'. The 'batch' option returns
        one loss per item in the batch, 'batchmean' returns sum / batch size.

    Returns
    -------
    The computed kldiv loss.

    Example
    -------
    >>> probs = torch.tensor([[0.9, 0.1], [0.1, 0.9]])
    >>> kldiv_loss(torch.log(probs), torch.tensor([1, 1]))
    tensor(1.2040)
    r   r;   r	   r   Nr`   rk   r   r]   r_   r^   )r=   r?   r   r|   detachrf   rI   no_gradrK   fill_masked_fillscatter_rc   rd   kl_divr^   r   r   )r   r   rs   r   rt   pad_idxr   bztimen_class
confidencetrue_distributionignorer.   r   r   r    
kldiv_loss5  s:   *


r         ?      Y@c                 C   s    t tjt||d| |||ddS )a  A loss function that can be used in cases where a model outputs
    an arbitrary probability distribution for a discrete variable on
    an interval scale, such as the length of a sequence, and the ground
    truth is the precise values of the variable from a data sample.

    The loss is defined as
    loss_i = p_i * exp(beta * |i - y|) - 1.

    The loss can also be used where outputs aren't probabilities, so long
    as high values close to the ground truth position and low values away
    from it are desired

    Arguments
    ---------
    predictions: torch.Tensor
        a (batch x max_len) tensor in which each element is a probability,
        weight or some other value at that position
    targets: torch.Tensor
        a 1-D tensor in which each element is thr ground truth
    length: torch.Tensor
        lengths (for masking in padded batches)
    beta: torch.Tensor
        a hyperparameter controlling the penalties. With a higher beta,
        penalties will increase faster
    max_weight: torch.Tensor
        the maximum distance weight (for numerical stability in long sequences)
    reduction: str
        Options are 'mean', 'batch', 'batchmean', 'sum'.
        See pytorch for 'mean', 'sum'. The 'batch' option returns
        one loss per item in the batch, 'batchmean' returns sum / batch size

    Returns
    -------
    The masked loss.

    Example
    -------
    >>> predictions = torch.tensor(
    ...    [[0.25, 0.5, 0.25, 0.0],
    ...     [0.05, 0.05, 0.9, 0.0],
    ...     [8.0, 0.10, 0.05, 0.05]]
    ... )
    >>> targets = torch.tensor([2., 3., 1.])
    >>> length = torch.tensor([.75, .75, 1.])
    >>> loss = distance_diff_loss(predictions, targets, length)
    >>> loss
    tensor(0.2967)
    )beta
max_weightr.   )rr   r   rs   r   
mask_shape)rp   rm   rn   _distance_diff_loss)rr   r   rs   r   r   r   r   r   r    distance_diff_loss  s   8r   c           	      C   sb   | j \}}t|d|d| j}||d  }||  d j	|d}||  dS )at  Computes the raw (unreduced) distance difference loss

    Arguments
    ---------
    predictions: torch.Tensor
        a (batch x max_len) tensor in which each element is a probability,
        weight or some other value at that position
    targets: torch.Tensor
        a 1-D tensor in which each element is thr ground truth
    beta: torch.Tensor
        a hyperparameter controlling the penalties. With a higher beta,
        penalties will increase faster
    max_weight: torch.Tensor
        the maximum distance weight (for numerical stability in long sequences)

    Returns
    -------
    The raw distance loss.
    r   r	   r         ?)max)
r   rI   aranger?   r@   torH   absexpclamp)	rr   r   r   r   
batch_sizemax_len	pos_range
diff_rangeloss_weightsr   r   r    r     s   
r   c                 C   s   | j d |j d  }|dkr| |fS t||kr&td| j d |j d f |dk r9| |ddd| j d f fS | ddd|j d f |fS )a  Ensure that predictions and targets are the same length.

    Arguments
    ---------
    predictions : torch.Tensor
        First tensor for checking length.
    targets : torch.Tensor
        Second tensor for checking length.
    allowed_len_diff : int
        Length difference that will be tolerated before raising an exception.

    Returns
    -------
    predictions : torch.Tensor
    targets : torch.Tensor
        Same as inputs, but with the same shape.
    r	   r   zNPredictions and targets should be same length, but got %s and %s respectively.N)r   r   r   )rr   r   rt   len_diffr   r   r    rl     s   rl   r   c           
      C   sf   | ||}|dkr|}n|dkr|}n|dkr|}nt d| t||}	||	9 }t||	||||S )a  Compute the true average loss of a set of waveforms of unequal length.

    Arguments
    ---------
    loss_fn : function
        A function for computing the loss taking just predictions and targets.
        Should return all the losses, not a reduction (e.g. reduction="none").
    predictions : torch.Tensor
        First argument to loss function.
    targets : torch.Tensor
        Second argument to loss function.
    length : torch.Tensor
        Length of each utterance to compute mask. If None, global average is
        computed and returned.
    label_smoothing: float
        The proportion of label smoothing. Should only be used for NLL loss.
        Ref: Regularizing Neural Networks by Penalizing Confident Output
        Distributions. https://arxiv.org/abs/1701.06548
    mask_shape: torch.Tensor
        the shape of the mask
        The default is "targets", which will cause the mask to be the same
        shape as the targets

        Other options include "predictions" and "loss", which will use the
        shape of the predictions and the unreduced loss, respectively.
        These are useful for loss functions that whose output does not
        match the shape of the targets
    reduction : str
        One of 'mean', 'batch', 'batchmean', 'none' where 'mean' returns a
        single value and 'batch' returns one per item in the batch and
        'batchmean' is sum / batch_size and 'none' returns all.

    Returns
    -------
    The masked loss.
    r   rr   r.   zInvalid mask_shape value )r   compute_length_maskreduce_loss)
loss_fnrr   r   rs   r   r   r   r.   	mask_datamaskr   r   r    rp     s   
/
rp   r	   c                 C   s   t | }|dur>t|| j|  | j| d}t|jt|jk r0|d}t|jt|jk s!||jd|}||9 }|S )a  Computes a length mask for the specified data shape

    Arguments
    ---------
    data: torch.Tensor
        the data shape
    length: torch.Tensor
        the length of the corresponding data samples
    len_dim: int
        the length dimension (defaults to 1)

    Returns
    -------
    mask: torch.Tensor
        the mask

    Example
    -------
    >>> data = torch.arange(5)[None, :, None].repeat(3, 1, 2)
    >>> data += torch.arange(1, 4)[:, None, None]
    >>> data *= torch.arange(1, 3)[None, None, :]
    >>> data
    tensor([[[ 1,  2],
             [ 2,  4],
             [ 3,  6],
             [ 4,  8],
             [ 5, 10]],
    <BLANKLINE>
            [[ 2,  4],
             [ 3,  6],
             [ 4,  8],
             [ 5, 10],
             [ 6, 12]],
    <BLANKLINE>
            [[ 3,  6],
             [ 4,  8],
             [ 5, 10],
             [ 6, 12],
             [ 7, 14]]])
    >>> compute_length_mask(data, torch.tensor([1., .4, .8]))
    tensor([[[1, 1],
             [1, 1],
             [1, 1],
             [1, 1],
             [1, 1]],
    <BLANKLINE>
            [[1, 1],
             [1, 1],
             [0, 0],
             [0, 0],
             [0, 0]],
    <BLANKLINE>
            [[1, 1],
             [1, 1],
             [1, 1],
             [1, 1],
             [0, 0]]])
    >>> compute_length_mask(data, torch.tensor([.5, 1., .5]), len_dim=2)
    tensor([[[1, 0],
             [1, 0],
             [1, 0],
             [1, 0],
             [1, 0]],
    <BLANKLINE>
            [[1, 1],
             [1, 1],
             [1, 1],
             [1, 1],
             [1, 1]],
    <BLANKLINE>
            [[1, 0],
             [1, 0],
             [1, 0],
             [1, 0],
             [1, 0]]])
    N)r   r   r	   )	rI   	ones_liker   r   rA   r?   typedtyperb   )datars   len_dimr   length_maskr   r   r    r   H  s   
M
r   c                 C   s   |  d}|dkr|  t| } n!|dkr|  | } n|dkr4| |dd||dd } |dkr:| S tj|dd| }|dkrRt|t| }n|dkrat||jd  }n|dkro|d|d }| | d| |   S )a*  Performs the specified reduction of the raw loss value

    Arguments
    ---------
    loss : function
        A function for computing the loss taking just predictions and targets.
        Should return all the losses, not a reduction (e.g. reduction="none").
    mask : torch.Tensor
        Mask to apply before computing loss.
    reduction : str
        One of 'mean', 'batch', 'batchmean', 'none' where 'mean' returns a
        single value and 'batch' returns one per item in the batch and
        'batchmean' is sum / batch_size and 'none' returns all.
    label_smoothing: float
        The proportion of label smoothing. Should only be used for NLL loss.
        Ref: Regularizing Neural Networks by Penalizing Confident Output
        Distributions. https://arxiv.org/abs/1701.06548
    predictions : torch.Tensor
        First argument to loss function. Required only if label smoothing is used.
    targets : torch.Tensor
        Second argument to loss function. Required only if label smoothing is used.

    Returns
    -------
    Reduced loss.
    r   r   r]   r_   r   r	   r<   )r>   r^   rI   reshaper   r   )r.   r   r   r   rr   r   rh   loss_regr   r   r    r     s"   
"$r   c                 C      t t}|| |\}}|S )a  This function wraps si_snr calculation with the speechbrain pit-wrapper.

    Arguments
    ---------
    source: torch.Tensor
        Shape is [B, T, C],
        Where B is the batch size, T is the length of the sources, C is
        the number of sources the ordering is made so that this loss is
        compatible with the class PitWrapper.
    estimate_source: torch.Tensor
        The estimated source, of shape [B, T, C]

    Returns
    -------
    loss: torch.Tensor
        The computed SNR

    Example
    -------
    >>> x = torch.arange(600).reshape(3, 100, 2)
    >>> xhat = x[:, :, (1, 0)]
    >>> si_snr = -get_si_snr_with_pitwrapper(x, xhat)
    >>> print(si_snr)
    tensor([135.2284, 135.2284, 135.2284])
    )r"   
cal_si_snr)sourceestimate_source
pit_si_snrr.   rU   r   r   r    get_si_snr_with_pitwrapper  s   r   c                 C   r   )a  This function wraps snr calculation with the speechbrain pit-wrapper.

    Arguments
    ---------
    source: torch.Tensor
        Shape is [B, T, E, C],
        Where B is the batch size, T is the length of the sources, E is binaural channels, C is the number of sources
        the ordering is made so that this loss is compatible with the class PitWrapper.
    estimate_source: torch.Tensor
        The estimated source, of shape [B, T, E, C]

    Returns
    -------
    loss: torch.Tensor
        The computed SNR
    )r"   cal_snr)r   r   pit_snrr.   rU   r   r   r    get_snr_with_pitwrapper  s   r   c                 C   s>  d}|   |  ksJ |jj}tj|jd g|jd  |d}t| |}||9 }| ddd	 }tj
| ddd| }tj
|ddd| }| | }	|| }
|	|9 }	|
|9 }
|	}|
}tj
|| ddd}tj
|d	 ddd| }|| | }|| }tj
|d	 dd
tj
|d	 dd
|  }dt||  }|d S )a  Calculate SI-SNR.

    Arguments
    ---------
    source: torch.Tensor
        Shape is [T, B, C],
        Where B is batch size, T is the length of the sources, C is the number of sources
        the ordering is made so that this loss is compatible with the class PitWrapper.
    estimate_source: torch.Tensor
        The estimated source, of shape [T, B, C]

    Returns
    -------
    The calculated SI-SNR.

    Example:
    ---------
    >>> import numpy as np
    >>> x = torch.Tensor([[1, 0], [123, 45], [34, 5], [2312, 421]])
    >>> xhat = x[:, (1, 0)]
    >>> x = x.unsqueeze(-1).repeat(1, 1, 2)
    >>> xhat = xhat.unsqueeze(1).repeat(1, 2, 1)
    >>> si_snr = -cal_si_snr(x, xhat)
    >>> print(si_snr)
    tensor([[[ 25.2142, 144.1789],
             [130.9283,  25.2142]]])
    :0yE>r   r3   rG   r	   r   Tr=   keepdimr;   r<   
   r>   rH   r   rI   rL   r   get_mask
contiguousr   rx   r^   log10r?   )r   r   EPSrH   source_lengthsr   num_samplesmean_targetmean_estimatezero_mean_targetzero_mean_estimates_target
s_estimatedots_target_energyproje_noisesi_snr_beforelogsi_snrr   r   r    r     s:   
r   c                 C   s  d}|   |  ksJ |jj}tj|jd g|jd  |d}t| |}||9 }| ddd	 }tj
| ddd| }tj
|ddd| }| | }	|| }
|	|9 }	|
|9 }
|	}|
}tj
|d	 dd
tj
|| d	 dd
|  }dt||  }|d S )a  Calculate binaural channel SNR.

    Arguments
    ---------
    source: torch.Tensor
        Shape is [T, E, B, C]
        Where B is batch size, T is the length of the sources, E is binaural channels, C is the number of sources
        the ordering is made so that this loss is compatible with the class PitWrapper.
    estimate_source: torch.Tensor
        The estimated source, of shape [T, E, B, C]

    Returns
    -------
    Binaural channel SNR
    r   r   r3   rG   r	   r   Tr   r;   r<   r   r   )r   r   r   rH   r   r   r   r   r   r   r   r   r   snr_beforelogsnrr   r   r    r   ]  s0   
r   c                 C   s\   |  |  dd ddd}| d}t|D ]}d||| d|f< q|ddS )a  
    Arguments
    ---------
    source : torch.Tensor
        Shape [T, B, C]
    source_lengths : torch.Tensor
        Shape [B]

    Returns
    -------
    mask : torch.Tensor
        Shape [T, B, 1]

    Example
    -------
    >>> source = torch.randn(4, 3, 2)
    >>> source_lengths = torch.Tensor([2, 1, 4]).int()
    >>> mask = get_mask(source, source_lengths)
    >>> print(mask)
    tensor([[[1.],
             [1.],
             [1.]],
    <BLANKLINE>
            [[1.],
             [0.],
             [1.]],
    <BLANKLINE>
            [[0.],
             [0.],
             [1.]],
    <BLANKLINE>
            [[0.],
             [0.],
             [1.]]])
    Nr   r	   r3   r   )new_onesr>   r?   rb   r,   )r   r   r   Bir   r   r    r     s
   $$
r   c                       s*   e Zd ZdZd fdd	Zdd Z  ZS )	AngularMargina  
    An implementation of Angular Margin (AM) proposed in the following
    paper: '''Margin Matters: Towards More Discriminative Deep Neural Network
    Embeddings for Speaker Recognition''' (https://arxiv.org/abs/1906.07317)

    Arguments
    ---------
    margin : float
        The margin for cosine similarity
    scale : float
        The scale for cosine similarity

    Example
    -------
    >>> pred = AngularMargin()
    >>> outputs = torch.tensor([ [1., -1.], [-1., 1.], [0.9, 0.1], [0.1, 0.9] ])
    >>> targets = torch.tensor([ [1., 0.], [0., 1.], [ 1., 0.], [0.,  1.] ])
    >>> predictions = pred(outputs, targets)
    >>> predictions[:,0] > predictions[:,1]
    tensor([ True, False,  True, False])
    r~   r   c                    s   t    || _|| _d S r$   )r%   r&   marginscale)r(   r   r   r)   r   r    r&     s   

zAngularMargin.__init__c                 C   s   || j |  }| j| S )a`  Compute AM between two tensors

        Arguments
        ---------
        outputs : torch.Tensor
            The outputs of shape [N, C], cosine similarity is required.
        targets : torch.Tensor
            The targets of shape [N, C], where the margin is applied for.

        Returns
        -------
        predictions : torch.Tensor
        )r   r   )r(   outputsr   r   r   r    rW     s   
zAngularMargin.forward)r~   r   rX   rY   rZ   r[   r&   rW   r\   r   r   r)   r    r     s    r   c                       s*   e Zd ZdZd	 fdd	Zdd Z  ZS )
AdditiveAngularMargina  
    An implementation of Additive Angular Margin (AAM) proposed
    in the following paper: '''Margin Matters: Towards More Discriminative Deep
    Neural Network Embeddings for Speaker Recognition'''
    (https://arxiv.org/abs/1906.07317)

    Arguments
    ---------
    margin : float
        The margin for cosine similarity.
    scale : float
        The scale for cosine similarity.
    easy_margin : bool

    Example
    -------
    >>> outputs = torch.tensor([ [1., -1.], [-1., 1.], [0.9, 0.1], [0.1, 0.9] ])
    >>> targets = torch.tensor([ [1., 0.], [0., 1.], [ 1., 0.], [0.,  1.] ])
    >>> pred = AdditiveAngularMargin()
    >>> predictions = pred(outputs, targets)
    >>> predictions[:,0] > predictions[:,1]
    tensor([ True, False,  True, False])
    r~   r   Fc                    sb   t  || || _t| j| _t| j| _ttj	| j | _
ttj	| j | j | _d S r$   )r%   r&   easy_marginmathcosr   cos_msinsin_mpithmm)r(   r   r   r   r)   r   r    r&     s   zAdditiveAngularMargin.__init__c                 C   s   |  }t|dd}tdt|d }|| j || j  }| jr-t|dk||}nt|| j	k||| j
 }|| d| |  }| j| S )aj  
        Compute AAM between two tensors

        Arguments
        ---------
        outputs : torch.Tensor
            The outputs of shape [N, C], cosine similarity is required.
        targets : torch.Tensor
            The targets of shape [N, C], where the margin is applied for.

        Returns
        -------
        predictions : torch.Tensor
        gPgP?r   r;   r   )rx   rI   r   sqrtpowr   r   r   wherer   r   r   )r(   r   r   cosinesinephir   r   r    rW     s   
zAdditiveAngularMargin.forward)r~   r   Fr   r   r   r)   r    r     s    	r   c                       s*   e Zd ZdZ fddZdddZ  ZS )LogSoftmaxWrappera=  
    Arguments
    ---------
    loss_fn : Callable
        The LogSoftmax function to wrap.

    Example
    -------
    >>> outputs = torch.tensor([ [1., -1.], [-1., 1.], [0.9, 0.1], [0.1, 0.9] ])
    >>> outputs = outputs.unsqueeze(1)
    >>> targets = torch.tensor([ [0], [1], [0], [1] ])
    >>> log_prob = LogSoftmaxWrapper(nn.Identity())
    >>> loss = log_prob(outputs, targets)
    >>> 0 <= loss < 1
    tensor(True)
    >>> log_prob = LogSoftmaxWrapper(AngularMargin(margin=0.2, scale=32))
    >>> loss = log_prob(outputs, targets)
    >>> 0 <= loss < 1
    tensor(True)
    >>> outputs = torch.tensor([ [1., -1.], [-1., 1.], [0.9, 0.1], [0.1, 0.9] ])
    >>> log_prob = LogSoftmaxWrapper(AdditiveAngularMargin(margin=0.3, scale=32))
    >>> loss = log_prob(outputs, targets)
    >>> 0 <= loss < 1
    tensor(True)
    c                    s$   t    || _tjjdd| _d S )Nr^   rk   )r%   r&   r   rI   rc   	KLDivLoss	criterion)r(   r   r)   r   r    r&   C  s   
zLogSoftmaxWrapper.__init__Nc                 C   s   | d}| d}t| |jd  }z| ||}W n ty-   | |}Y nw tj|dd}| 	|||
  }|S )a  
        Arguments
        ---------
        outputs : torch.Tensor
            Network output tensor, of shape
            [batch, 1, outdim].
        targets : torch.Tensor
            Target tensor, of shape [batch, 1].
        length : torch.Tensor
            The lengths of the corresponding inputs.

        Returns
        -------
        loss: torch.Tensor
            Loss for current examples.
        r	   r<   )r   Fone_hotr|   r   rx   r   	TypeErrorr   r   r^   )r(   r   r   rs   rr   r.   r   r   r    rW   H  s   

zLogSoftmaxWrapper.forwardr$   r   r   r   r)   r    r   (  s    r   c                 C   sP  t j|dd\}}g }g }t|jd D ]3}	||	 }
||	 | jd    }|
d| }
tt|
 	 |d}
t
|
}||
 || qt|}t|jd D ]}	|||	  }t|D ]	}||	 d q_qSt t|}|| | }t t| }|| || jd    }| dd} t jjj| ||||ddS )a  Knowledge distillation for CTC loss.

    Reference
    ---------
    Distilling Knowledge from Ensembles of Acoustic Models for Joint CTC-Attention End-to-End Speech Recognition.
    https://arxiv.org/abs/2005.09310

    Arguments
    ---------
    log_probs : torch.Tensor
        Predicted tensor from student model, of shape [batch, time, chars].
    targets : torch.Tensor
        Predicted tensor from single teacher model, of shape [batch, time, chars].
    input_lens : torch.Tensor
        Length of each utterance.
    blank_index : int
        The location of the blank symbol among the character indexes.
    device : str
        Device for computing.

    Returns
    -------
    The computed CTC loss.
    r   r<   r   r	   )blank_idT)ra   )rI   r   r,   r   r   r   r   listcpunumpyrA   rQ   
from_numpynparrayr   rb   rc   rd   re   )r   r   r   r   rH   scoresrr   	pred_listpred_len_listjcurrent_predactual_sizecurrent_pred_lenmax_pred_lendiffnfake_labfake_lab_lengthsr   r   r    ctc_loss_kdf  sD   


r  c                 C   s   | |   dS )ai  Simple version of distillation for cross-entropy loss.

    Arguments
    ---------
    inp : torch.Tensor
        The probabilities from student model, of shape [batch_size * length, feature]
    target : torch.Tensor
        The probabilities from teacher model, of shape [batch_size * length, feature]

    Returns
    -------
    The distilled outputs.
    r	   )r^   )inprC   r   r   r    ce_kd  s   r  c           
      C   s   | j d }| j d }t||j d   }| || | j d }t||tj|jd}||| |j d }t||}	t	|	||| t	| }	|	S )a  Knowledge distillation for negative log-likelihood loss.

    Reference
    ---------
    Distilling Knowledge from Ensembles of Acoustic Models for Joint CTC-Attention End-to-End Speech Recognition.
    https://arxiv.org/abs/2005.09310

    Arguments
    ---------
    probabilities : torch.Tensor
        The predicted probabilities from the student model.
        Format is [batch, frames, p]
    targets : torch.Tensor
        The target probabilities from the teacher model.
        Format is [batch, frames, p]
    rel_lab_lengths : torch.Tensor
        Length of each utterance, if the frame-level loss is desired.

    Returns
    -------
    Computed NLL KD loss.

    Example
    -------
    >>> probabilities = torch.tensor([[[0.8, 0.2], [0.2, 0.8]]])
    >>> targets = torch.tensor([[[0.9, 0.1], [0.1, 0.9]]])
    >>> rel_lab_lengths = torch.tensor([1.])
    >>> nll_loss_kd(probabilities, targets, rel_lab_lengths)
    tensor(-0.7400)
    r   r	   r   )r   r   rH   )
r   rI   r   r   r   r   rx   rH   r  r^   )
rz   r   rel_lab_lengthsN_sntr   lab_lengths	prob_currr   lab_currr.   r   r   r    nll_loss_kd  s   
 

 r  c                       s(   e Zd ZdZ fddZdd Z  ZS )ContrastiveLossa&  Contrastive loss as used in wav2vec2.

    Reference
    ---------
    wav2vec 2.0: A Framework for Self-Supervised Learning of Speech Representations
    https://arxiv.org/abs/2006.11477

    Arguments
    ---------
    logit_temp : torch.Float
        A temperature to divide the logits.
    c                    r#   r$   )r%   r&   
logit_temp)r(   r  r)   r   r    r&     r+   zContrastiveLoss.__init__c           
      C   s   ||k d}|d}tj||gdd}tj| | dd|}| r2td|dd |< |dd	d|
d}tj|
dtj|jd}tj|| j |d	d
}t|ddk| |
d  }	||	fS )a  Compute contrastive loss.

        Arguments
        ---------
        x : torch.Tensor
            Encoded embeddings with shape (B, T, C).
        y : torch.Tensor
            Feature extractor target embeddings with shape (B, T, C).
        negs : torch.Tensor
            Negative embeddings from feature extractor with shape (N, B, T, C)
            where N is number of negatives. Can be obtained with our sample_negatives
            function (check in lobes/wav2vec2).

        Returns
        -------
        loss : torch.Tensor
            The computed loss
        accuracy : torch.Tensor
            The computed accuracy
        r   r   r<   z-infr	   Nr;   )r   rH   r^   rk   )allr?   rI   catcosine_similarityrx   type_asanyrb   r   r>   zerosr|   rH   r   cross_entropyr  r^   rw   numel)
r(   r8   ynegs
neg_is_postarget_and_negativesr   r   r.   accuracyr   r   r    rW     s*   
zContrastiveLoss.forwardr   r   r   r)   r    r    s    r  c                       sF   e Zd ZdZd fdd	Zddd	Zdd
dZdd Zdd Z  Z	S )VariationalAutoencoderLossa"  The Variational Autoencoder loss, with support for length masking

    From Autoencoding Variational Bayes: https://arxiv.org/pdf/1312.6114.pdf

    Arguments
    ---------
    rec_loss: callable
        a function or module to compute the reconstruction loss
    len_dim: int
        the dimension to be used for the length, if encoding sequences
        of variable length
    dist_loss_weight: float
        the relative weight of the distribution loss (K-L divergence)

    Example
    -------
    >>> from speechbrain.nnet.autoencoders import VariationalAutoencoderOutput
    >>> vae_loss = VariationalAutoencoderLoss(dist_loss_weight=0.5)
    >>> predictions = VariationalAutoencoderOutput(
    ...     rec=torch.tensor(
    ...         [[0.8, 1.0],
    ...          [1.2, 0.6],
    ...          [0.4, 1.4]]
    ...         ),
    ...     mean=torch.tensor(
    ...         [[0.5, 1.0],
    ...          [1.5, 1.0],
    ...          [1.0, 1.4]],
    ...         ),
    ...     log_var=torch.tensor(
    ...         [[0.0, -0.2],
    ...          [2.0, -2.0],
    ...          [0.2,  0.4]],
    ...         ),
    ...     latent=torch.randn(3, 1),
    ...     latent_sample=torch.randn(3, 1),
    ...     latent_length=torch.tensor([1., 1., 1.]),
    ... )
    >>> targets = torch.tensor(
    ...     [[0.9, 1.1],
    ...      [1.4, 0.6],
    ...      [0.2, 1.4]]
    ... )
    >>> loss = vae_loss(predictions, targets)
    >>> loss
    tensor(1.1264)
    >>> details = vae_loss.details(predictions, targets)
    >>> details  #doctest: +NORMALIZE_WHITESPACE
    VariationalAutoencoderLossDetails(loss=tensor(1.1264),
                                      rec_loss=tensor(0.0333),
                                      dist_loss=tensor(2.1861),
                                      weighted_dist_loss=tensor(1.0930))
    Nr	   MbP?c                    s,   t    |d u rt}|| _|| _|| _d S r$   )r%   r&   rv   rec_lossdist_loss_weightr   )r(   r&  r   r'  r)   r   r    r&   i  s   

z#VariationalAutoencoderLoss.__init__r]   c                 C   s   |  ||||jS )a?  Computes the forward pass

        Arguments
        ---------
        predictions: speechbrain.nnet.autoencoders.VariationalAutoencoderOutput
            the variational autoencoder output
        targets: torch.Tensor
            the reconstruction targets
        length : torch.Tensor
            Length of each sample for computing true error with a mask.
        reduction: str
            The type of reduction to apply, default "batchmean"

        Returns
        -------
        loss: torch.Tensor
            the VAE loss (reconstruction + K-L divergence)
        )detailsr.   )r(   rr   r   rs   r   r   r   r    rW   q  s   z"VariationalAutoencoderLoss.forwardc           	      C   s`   |du rt |d}| ||\}}t|||}t|||}| j| }|| }t||||S )a/  Gets detailed information about the loss (useful for plotting, logs,
        etc.)

        Arguments
        ---------
        predictions: speechbrain.nnet.autoencoders.VariationalAutoencoderOutput
            the variational autoencoder output (or a tuple of rec, mean, log_var)
        targets: torch.Tensor
            targets for the reconstruction loss
        length : torch.Tensor
            Length of each sample for computing true error with a mask.
        reduction: str
            The type of reduction to apply, default "batchmean"

        Returns
        -------
        details: VAELossDetails
            a namedtuple with the following parameters
            loss: torch.Tensor
                the combined loss
            rec_loss: torch.Tensor
                the reconstruction loss
            dist_loss: torch.Tensor
                the distribution loss (K-L divergence), raw value
            weighted_dist_loss: torch.Tensor
                the weighted value of the distribution loss, as used
                in the combined loss

        Nr   )rI   onesr>   _compute_components_reduce_autoencoder_lossr'  !VariationalAutoencoderLossDetails)	r(   rr   r   rs   r   r&  	dist_lossweighted_dist_lossr.   r   r   r    r(    s   
z"VariationalAutoencoderLoss.detailsc           	      C   sP   |\}}}}}}|  | j||d d}|  dd| |d  |   }||fS )Nrk   g      r	   r;   )_align_length_axisr&  r   )	r(   rr   r   rec_r   log_varr&  r-  r   r   r    r*    s   z.VariationalAutoencoderLoss._compute_componentsc                 C      | | jdS Nr	   moveaxisr   r(   rL   r   r   r    r/       z-VariationalAutoencoderLoss._align_length_axis)Nr	   r%  Nr]   )
rX   rY   rZ   r[   r&   rW   r(  r*  r/  r\   r   r   r)   r    r$  2  s    6

*
r$  c                       s>   e Zd ZdZd fdd	ZdddZdd	d
Zdd Z  ZS )AutoencoderLossa  An implementation of a standard (non-variational)
    autoencoder loss

    Arguments
    ---------
    rec_loss: callable
        the callable to compute the reconstruction loss
    len_dim: int
        the dimension index to be used for length

    Example
    -------
    >>> from speechbrain.nnet.autoencoders import AutoencoderOutput
    >>> ae_loss = AutoencoderLoss()
    >>> rec = torch.tensor(
    ...   [[0.8, 1.0],
    ...    [1.2, 0.6],
    ...    [0.4, 1.4]]
    ... )
    >>> predictions = AutoencoderOutput(
    ...     rec=rec,
    ...     latent=torch.randn(3, 1),
    ...     latent_length=torch.tensor([1., 1.])
    ... )
    >>> targets = torch.tensor(
    ...     [[0.9, 1.1],
    ...      [1.4, 0.6],
    ...      [0.2, 1.4]]
    ... )
    >>> ae_loss(predictions, targets)
    tensor(0.0333)
    >>> ae_loss.details(predictions, targets)
    AutoencoderLossDetails(loss=tensor(0.0333), rec_loss=tensor(0.0333))
    Nr	   c                    s&   t    |d u rt}|| _|| _d S r$   )r%   r&   rv   r&  r   )r(   r&  r   r)   r   r    r&     s
   

zAutoencoderLoss.__init__r]   c                 C   s$   |  | j||jdd}t|||S )a  Computes the autoencoder loss

        Arguments
        ---------
        predictions: speechbrain.nnet.autoencoders.AutoencoderOutput
            the autoencoder output
        targets: torch.Tensor
            targets for the reconstruction loss
        length: torch.Tensor
            Length of each sample for computing true error with a mask
        reduction: str
            The type of reduction to apply, default "batchmean"

        Returns
        -------
        The computed loss.
        Nrk   )r/  r&  r0  r+  )r(   rr   r   rs   r   r&  r   r   r    rW     s   zAutoencoderLoss.forwardc                 C   s   | ||||}t ||S )a  Gets detailed information about the loss (useful for plotting, logs,
        etc.)

        This is provided mainly to make the loss interchangeable with
        more complex autoencoder loses, such as the VAE loss.

        Arguments
        ---------
        predictions: speechbrain.nnet.autoencoders.AutoencoderOutput
            the  autoencoder output
        targets: torch.Tensor
            targets for the reconstruction loss
        length : torch.Tensor
            Length of each sample for computing true error with a mask.
        reduction: str
            The type of reduction to apply, default "batchmean"

        Returns
        -------
        details: AutoencoderLossDetails
            a namedtuple with the following parameters
            loss: torch.Tensor
                the combined loss
            rec_loss: torch.Tensor
                the reconstruction loss
        )AutoencoderLossDetails)r(   rr   r   rs   r   r.   r   r   r    r(     s   
zAutoencoderLoss.detailsc                 C   r3  r4  r5  r7  r   r   r    r/    r8  z"AutoencoderLoss._align_length_axisr4  r9  )	rX   rY   rZ   r[   r&   rW   r(  r/  r\   r   r   r)   r    r:    s    #

r:  c                 C   sR   |  d}|d urt|| |}t|| | }nt| }t| | ||d}|S )Nr	   rk   )r>   r   r   	expand_asrI   r   r   )r.   rs   r   r   r   reduced_lossr   r   r    r+  "  s   

r+  r,  )r.   r&  r-  r.  r;  r.   r&  c                       s6   e Zd ZdZejf fdd	Zdd Zdd Z  Z	S )	LaplacianaA  Computes the Laplacian for image-like data

    Arguments
    ---------
    kernel_size: int
        the size of the Laplacian kernel
    dtype: torch.dtype
        the data type (optional)

    Example
    -------
    >>> lap = Laplacian(3)
    >>> lap.get_kernel()
    tensor([[[[-1., -1., -1.],
              [-1.,  8., -1.],
              [-1., -1., -1.]]]])
    >>> data = torch.eye(6) + torch.eye(6).flip(0)
    >>> data
    tensor([[1., 0., 0., 0., 0., 1.],
            [0., 1., 0., 0., 1., 0.],
            [0., 0., 1., 1., 0., 0.],
            [0., 0., 1., 1., 0., 0.],
            [0., 1., 0., 0., 1., 0.],
            [1., 0., 0., 0., 0., 1.]])
    >>> lap(data.unsqueeze(0))
    tensor([[[ 6., -3., -3.,  6.],
             [-3.,  4.,  4., -3.],
             [-3.,  4.,  4., -3.],
             [ 6., -3., -3.,  6.]]])
    c                    s.   t    || _|| _|  }| d| d S )Nkernel)r%   r&   kernel_sizer   
get_kernelregister_buffer)r(   r@  r   r?  r)   r   r    r&   W  s
   
zLaplacian.__init__c                 C   sP   t j| j| j| jd }| jd }| jd d }||||f< |dd}|S )zComputes the Laplacian kernel)r   r;   r   r   )rI   r)  r@  r   r?   )r(   r?  mid_position	mid_valuer   r   r    rA  ^  s   
zLaplacian.get_kernelc                 C   s   t || jS )a  Computes the Laplacian of image-like data

        Arguments
        ---------
        data: torch.Tensor
            a (B x C x W x H) or (B x C x H x W) tensor with image-like data

        Returns
        -------
        The transformed outputs.
        )r   conv2dr?  )r(   r   r   r   r    rW   i  s   zLaplacian.forward)
rX   rY   rZ   r[   rI   float32r&   rA  rW   r\   r   r   r)   r    r>  7  s
    r>  c                       s,   e Zd ZdZd	 fdd	Zd
ddZ  ZS )LaplacianVarianceLossa~  The Laplacian variance loss - used to penalize blurriness in image-like
    data, such as spectrograms.

    The loss value will be the negative variance because the
    higher the variance, the sharper the image.

    Arguments
    ---------
    kernel_size: int
        the Laplacian kernel size

    len_dim: int
        the dimension to be used as the length

    Example
    -------
    >>> lap_loss = LaplacianVarianceLoss(3)
    >>> data = torch.ones(6, 6).unsqueeze(0)
    >>> data
    tensor([[[1., 1., 1., 1., 1., 1.],
             [1., 1., 1., 1., 1., 1.],
             [1., 1., 1., 1., 1., 1.],
             [1., 1., 1., 1., 1., 1.],
             [1., 1., 1., 1., 1., 1.],
             [1., 1., 1., 1., 1., 1.]]])
    >>> lap_loss(data)
    tensor(-0.)
    >>> data = (
    ...     torch.eye(6) + torch.eye(6).flip(0)
    ... ).unsqueeze(0)
    >>> data
    tensor([[[1., 0., 0., 0., 0., 1.],
             [0., 1., 0., 0., 1., 0.],
             [0., 0., 1., 1., 0., 0.],
             [0., 0., 1., 1., 0., 0.],
             [0., 1., 0., 0., 1., 0.],
             [1., 0., 0., 0., 0., 1.]]])
    >>> lap_loss(data)
    tensor(-17.6000)
    ri   r	   c                    s    t    || _t|d| _d S )N)r@  )r%   r&   r   r>  	laplacian)r(   r@  r   r)   r   r    r&     s   
zLaplacianVarianceLoss.__init__Nc                 C   sb   |  |}|| jd}t|| }|dkr'tdd t||D }| S ||	 }| S )a~  Computes the Laplacian loss

        Arguments
        ---------
        predictions: torch.Tensor
            a (B x C x W x H) or (B x C x H x W) tensor
        length: torch.Tensor
            The length of the corresponding inputs.
        reduction: str
            "batch" or None

        Returns
        -------
        loss: torch.Tensor
            the loss value
        r	   r_   c                 S   s   g | ]\}}| | qS r   )masked_selectvar)r7   item	item_maskr   r   r    r9     s    z1LaplacianVarianceLoss.forward.<locals>.<listcomp>)
rH  r6  r   r   boolrI   rR   rP   rI  rJ  )r(   rr   rs   r   rH  r   r.   r   r   r    rW     s   
zLaplacianVarianceLoss.forward)ri   r	   )NNr   r   r   r)   r    rG  x  s    )rG  )r   T)r   )Nri   r   )Nr~   ri   Nr   )NNNr   ri   r~   )Nr~   ri   r   r   )Nr   r   r   )ri   )Nr~   r   r   r4  )r   r~   NN)<r[   rm   r   collectionsr   	itertoolsr   r   r   rI   torch.nnrc   torch.nn.functionalrd   r   speechbrain.dataio.dataior   speechbrain.decoders.ctcr   speechbrain.utils.data_utilsr   speechbrain.utils.loggerr   rX   loggerr!   Moduler"   re   ro   rv   r}   r   r   r   r   r   rl   rp   r   r   r   r   r   r   r   r   r   r   r  r  r  r  r$  r:  r+  r,  r;  r>  rG  r   r   r   r    <module>   s    	
A 
9
&
&
1
A
W
S
D
$

B_
8!H5+.=>C9? dA