o
    }oi                     @   s   d dl Z d dlm  mZ d dlmZ d dlmZ	 d dl
mZ G dd de jjZG dd de jjZG d	d
 d
e jjZG dd de jjZdS )    N)	rearrange)grad)mask_sequence_tensorc                       0   e Zd ZdZddef fddZdd Z  ZS )	GradientPenaltyLossa2  
    R1 loss from [1], used following [2]
    [1] Mescheder et. al. - Which Training Methods for GANs do actually Converge? 2018, https://arxiv.org/abs/1801.04406
    [2] Karras et. al. - A Style-Based Generator Architecture for Generative Adversarial Networks, 2018 (https://arxiv.org/abs/1812.04948)
          $@weightc                       t    || _d S Nsuper__init__r   selfr   	__class__ k/home/ubuntu/.local/lib/python3.10/site-packages/nemo/collections/tts/losses/spectrogram_enhancer_losses.pyr   6      

zGradientPenaltyLoss.__init__c                 C   s^   |j ^}}t||tj| |jdddddd }||d}| j|jdddd d 	  S )	N)deviceT)outputsinputsgrad_outputscreate_graphretain_graphonly_inputsr         dim)
shape
torch_gradtorchonessizer   reshaper   normmean)r   imagesoutput
batch_size_	gradientsr   r   r   __call__:   s   
	 zGradientPenaltyLoss.__call__)r   __name__
__module____qualname____doc__floatr   r.   __classcell__r   r   r   r   r   /   s    r   c                   @      e Zd Zdd ZdS )GeneratorLossc                 C   s   |  S r
   )r(   )r   fake_logitsr   r   r   r.   J   s   zGeneratorLoss.__call__Nr0   r1   r2   r.   r   r   r   r   r7   I       r7   c                   @   r6   )	HingeLossc                 C   s    t d| t d|   S )Nr   )Frelur(   )r   real_logitsr8   r   r   r   r.   O   s    zHingeLoss.__call__Nr9   r   r   r   r   r;   N   r:   r;   c                       r   )	ConsistencyLossz
    Loss to keep SpectrogramEnhancer from generating extra sounds.
    L1 distance on x0.25 Mel scale (20 bins for typical 80-bin scale)
    
   r   c                    r	   r
   r   r   r   r   r   r   Y   r   zConsistencyLoss.__init__c                 C   s~   |j ^ }}}|d |}}tj|||fddd}tj|||fddd}||  }t||}|t|d jdd | j S )N   bilinearT)r%   mode	antialiaszb -> b 1 1 1r   r   )	r!   r<   interpolateabsr   r   sumr(   r   )r   	conditionr*   lengthsr,   whdistr   r   r   r.   ]   s   
 zConsistencyLoss.__call__)r@   r/   r   r   r   r   r?   S   s    r?   )r#   torch.nn.functionalnn
functionalr<   einopsr   torch.autogradr   r"   #nemo.collections.common.parts.utilsr   Moduler   r7   r;   r?   r   r   r   r   <module>   s   &