o
    i'                     @   s   d Z ddlmZmZmZmZ ddlZddlm  m	Z
 ddlmZ G dd dejjZG dd dejjZG d	d
 d
ejjZG dd dejjZdS )zkHiFiGAN-related loss modules.

This code is modified from https://github.com/kan-bayashi/ParallelWaveGAN.

    )ListOptionalTupleUnionN)LogMelFbankc                       sp   e Zd ZdZ		ddedef fddZdeeee	j
  ee	j
 e	j
f d	e	j
fd
dZdd Zdd Z  ZS )GeneratorAdversarialLossz"Generator adversarial loss module.Tmseaverage_by_discriminators	loss_typec                    sF   t    || _|dv sJ | d|dkr| j| _dS | j| _dS )zInitialize GeneratorAversarialLoss module.

        Args:
            average_by_discriminators (bool): Whether to average the loss by
                the number of discriminators.
            loss_type (str): Loss type, "mse" or "hinge".

        r   hinge is not supported.r   N)super__init__r	   	_mse_loss	criterion_hinge_lossselfr	   r
   	__class__ P/home/ubuntu/.local/lib/python3.10/site-packages/espnet2/gan_tts/hifigan/loss.pyr      s   
z!GeneratorAdversarialLoss.__init__outputsreturnc                 C   sl   t |ttfr/d}t|D ]\}}t |ttfr|d }|| |7 }q| jr-||d  }|S | |}|S )aO  Calcualate generator adversarial loss.

        Args:
            outputs (Union[List[List[Tensor]], List[Tensor], Tensor]): Discriminator
                outputs, list of discriminator outputs, or list of list of discriminator
                outputs..

        Returns:
            Tensor: Generator adversarial loss value.

                   )
isinstancetuplelist	enumerater   r	   )r   r   adv_lossioutputs_r   r   r   forward*   s   
z GeneratorAdversarialLoss.forwardc                 C      t ||| S NFmse_lossnew_onessizer   xr   r   r   r   G      z"GeneratorAdversarialLoss._mse_lossc                 C   s
   |   S r'   )meanr-   r   r   r   r   J   s   
z$GeneratorAdversarialLoss._hinge_lossTr   )__name__
__module____qualname____doc__boolstrr   r   r   torchTensorr%   r   r   __classcell__r   r   r   r   r      s     
r   c                       s   e Zd ZdZ		ddedef fddZdeeee	j
  ee	j
 e	j
f d	eeee	j
  ee	j
 e	j
f d
ee	j
e	j
f fddZde	j
d
e	j
fddZde	j
d
e	j
fddZde	j
d
e	j
fddZde	j
d
e	j
fddZ  ZS )DiscriminatorAdversarialLossz&Discriminator adversarial loss module.Tr   r	   r
   c                    sV   t    || _|dv sJ | d|dkr!| j| _| j| _dS | j| _| j| _dS )zInitialize DiscriminatorAversarialLoss module.

        Args:
            average_by_discriminators (bool): Whether to average the loss by
                the number of discriminators.
            loss_type (str): Loss type, "mse" or "hinge".

        r   r   r   N)	r   r   r	   _mse_fake_lossfake_criterion_mse_real_lossreal_criterion_hinge_fake_loss_hinge_real_lossr   r   r   r   r   Q   s   
z%DiscriminatorAdversarialLoss.__init__outputs_hatr   r   c                 C   s   t |ttfrId}d}tt||D ]#\}\}}t |ttfr'|d }|d }|| |7 }|| |7 }q| jrE||d  }||d  }||fS | |}| |}||fS )a  Calcualate discriminator adversarial loss.

        Args:
            outputs_hat (Union[List[List[Tensor]], List[Tensor], Tensor]): Discriminator
                outputs, list of discriminator outputs, or list of list of discriminator
                outputs calculated from generator.
            outputs (Union[List[List[Tensor]], List[Tensor], Tensor]): Discriminator
                outputs, list of discriminator outputs, or list of list of discriminator
                outputs calculated from groundtruth.

        Returns:
            Tensor: Discriminator real loss value.
            Tensor: Discriminator fake loss value.

        r   r   r   )r   r   r    r!   zipr?   r=   r	   )r   rB   r   	real_loss	fake_lossr#   outputs_hat_r$   r   r   r   r%   h   s    

z$DiscriminatorAdversarialLoss.forwardr.   c                 C   r&   r'   r(   r-   r   r   r   r>      r/   z+DiscriminatorAdversarialLoss._mse_real_lossc                 C   r&   r'   )r)   r*   	new_zerosr,   r-   r   r   r   r<      r/   z+DiscriminatorAdversarialLoss._mse_fake_lossc              	   C   s"   t t |d ||  S Nr   r8   r0   minrG   r,   r-   r   r   r   rA      s   "z-DiscriminatorAdversarialLoss._hinge_real_lossc              	   C   s$   t t | d ||  S rH   rI   r-   r   r   r   r@      s   $z-DiscriminatorAdversarialLoss._hinge_fake_lossr1   )r2   r3   r4   r5   r6   r7   r   r   r   r8   r9   r   r%   r>   r<   rA   r@   r:   r   r   r   r   r;   N   s(    
'r;   c                       s~   e Zd ZdZ			ddededef fddZd	eeeej	  eej	 f d
eeeej	  eej	 f dej	fddZ
  ZS )FeatureMatchLosszFeature matching loss module.TFaverage_by_layersr	   include_final_outputsc                    s    t    || _|| _|| _dS )a  Initialize FeatureMatchLoss module.

        Args:
            average_by_layers (bool): Whether to average the loss by the number
                of layers.
            average_by_discriminators (bool): Whether to average the loss by
                the number of discriminators.
            include_final_outputs (bool): Whether to include the final output of
                each discriminator for loss calculation.

        N)r   r   rL   r	   rM   )r   rL   r	   rM   r   r   r   r      s   

zFeatureMatchLoss.__init__	feats_hatfeatsr   c                 C   s   d}t t||D ]<\}\}}d}| js |dd }|dd }t t||D ]\}\}	}
|t|	|
 7 }q'| jrA||d  }||7 }q	| jrO||d  }|S )a	  Calculate feature matching loss.

        Args:
            feats_hat (Union[List[List[Tensor]], List[Tensor]]): List of list of
                discriminator outputs or list of discriminator outputs calcuated
                from generator's outputs.
            feats (Union[List[List[Tensor]], List[Tensor]]): List of list of
                discriminator outputs or list of discriminator outputs calcuated
                from groundtruth..

        Returns:
            Tensor: Feature matching loss value.

        r   Nr   r   )r!   rC   rM   r)   l1_lossdetachrL   r	   )r   rN   rO   feat_match_lossr#   
feats_hat_feats_feat_match_loss_j	feat_hat_feat_r   r   r   r%      s   
zFeatureMatchLoss.forward)TTF)r2   r3   r4   r5   r6   r   r   r   r8   r9   r%   r:   r   r   r   r   rK      s&    rK   c                       s   e Zd ZdZ											
			d dedededee dededee dee dedededee f fddZ		d!de
jde
jdee
j de
jfddZ  ZS )"MelSpectrogramLosszMel-spectrogram loss."V        NhannP   r   TF      $@fsn_fft
hop_length
win_lengthwindown_melsfminfmaxcenter
normalizedonesidedlog_basec                    s0   t    t|||||||||	|
||d| _dS )a  Initialize Mel-spectrogram loss.

        Args:
            fs (int): Sampling rate.
            n_fft (int): FFT points.
            hop_length (int): Hop length.
            win_length (Optional[int]): Window length.
            window (str): Window type.
            n_mels (int): Number of Mel basis.
            fmin (Optional[int]): Minimum frequency for Mel.
            fmax (Optional[int]): Maximum frequency for Mel.
            center (bool): Whether to use center window.
            normalized (bool): Whether to use normalized one.
            onesided (bool): Whether to use oneseded one.
            log_base (Optional[float]): Log base value.

        )r`   ra   rb   rc   rd   re   rf   rg   rh   ri   rj   rk   N)r   r   r   
wav_to_mel)r   r`   ra   rb   rc   rd   re   rf   rg   rh   ri   rj   rk   r   r   r   r      s   
 zMelSpectrogramLoss.__init__y_hatyspecr   c                 C   sR   |  |d\}}|du r|  |d\}}n| j |\}}t||}|S )a  Calculate Mel-spectrogram loss.

        Args:
            y_hat (Tensor): Generated waveform tensor (B, 1, T).
            y (Tensor): Groundtruth waveform tensor (B, 1, T).
            spec (Optional[Tensor]): Groundtruth linear amplitude spectrum tensor
                (B, n_fft, T). if provided, use it instead of groundtruth waveform.

        Returns:
            Tensor: Mel-spectrogram loss value.

        r   N)rl   squeezelogmelr)   rP   )r   rm   rn   ro   mel_hat_melmel_lossr   r   r   r%     s   zMelSpectrogramLoss.forward)rZ   r[   r\   Nr]   r^   r   NTFTr_   r'   )r2   r3   r4   r5   intr   r7   r6   floatr   r8   r9   r%   r:   r   r   r   r   rY      sb    	
4rY   )r5   typingr   r   r   r   r8   torch.nn.functionalnn
functionalr)   'espnet2.tts.feats_extract.log_mel_fbankr   Moduler   r;   rK   rY   r   r   r   r   <module>   s   <N=