o
    wi&                     @   s   d dl Z d dlm  mZ d dlmZmZ d dlm	Z	m
Z
mZmZ d dlmZ dd ZG dd deZG d	d
 d
eZG dd deZG dd deZdS )    N)Loss	typecheck)AudioSignalLengthsTypeLossTypeSpectrogramType)
NeuralTypec              
   C   sT   t t j| ||||dd}|d }|d }t t j|d |d  ddddS )	a^  Perform STFT and convert to magnitude spectrogram.
    Args:
        x (Tensor): Input signal tensor (B, T).
        fft_size (int): FFT size.
        hop_size (int): Hop size.
        win_length (int): Window length.
        window (str): Window function type.
    Returns:
        Tensor: Magnitude spectrogram (B, #frames, fft_size // 2 + 1).
    T)return_complex).r   ).      gHz>)minr
   )torchview_as_realstftsqrtclamp	transpose)xfft_sizehop_size
win_lengthwindowx_stftrealimag r   c/home/ubuntu/sommelier/.venv/lib/python3.10/site-packages/nemo/collections/tts/losses/stftlosses.pyr   5   s   (r   c                   @   s6   e Zd ZdZedd Zedd Ze dd ZdS )	SpectralConvergenceLossz!Spectral convergence loss module.c                 C   s   t dt t dt dS )NBTDx_magy_mag)r   r   selfr   r   r   input_typesK      

z#SpectralConvergenceLoss.input_typesc                 C      dt t diS Nlosselements_typer   r   r%   r   r   r   output_typesR      z$SpectralConvergenceLoss.output_typesc                C   s2   t j|| dddt j|ddd }t |}|S )aD  Calculate forward propagation. It is assumed that x_mag and y_mag were padded to fit the maximum batch
        sequence length with silence, hence it is assumed that the norm of these extra padded values are 0. Therefore,
        input_lengths is not a argument unlike in LogSTFTMagnitudeLoss.
        Args:
            x_mag (Tensor): Magnitude spectrogram of predicted signal (B, #frames, #freq_bins).
            y_mag (Tensor): Magnitude spectrogram of groundtruth signal (B, #frames, #freq_bins).
        Returns:
            Tensor: Spectral convergence loss value.
        fro)r
   r   )pdim)r   normmean)r&   r#   r$   r+   r   r   r   forwardX   s   $
zSpectralConvergenceLoss.forwardN	__name__
__module____qualname____doc__propertyr'   r/   r   r6   r   r   r   r   r   H   s    

r   c                   @   s<   e Zd ZdZedd Zedd Ze dddd	ZdS )
LogSTFTMagnitudeLosszLog STFT magnitude loss module.c                 C   (   t dt t dt t dt dddS )Nr   r   Toptionalr#   r$   input_lengths)r   r   r   r%   r   r   r   r'   m      

z LogSTFTMagnitudeLoss.input_typesc                 C   r)   r*   r.   r%   r   r   r   r/   u   r0   z!LogSTFTMagnitudeLoss.output_typesNrB   c                C   sx   |du rt t|t|S t jt|t|dd}||jd  }tj|ddgd}|| }t||jd  S )a  Calculate forward propagation.
        Args:
            x_mag (Tensor): Magnitude spectrogram of predicted signal (B, #frames, #freq_bins).
            y_mag (Tensor): Magnitude spectrogram of groundtruth signal (B, #frames, #freq_bins).
            input_lengths (Tensor): Length of groundtruth sample in samples (B).
        Returns:
            Tensor: Log STFT magnitude loss value.
        Nnone)	reductionr   r
   )r3   r   )Fl1_lossr   logshapesum)r&   r#   r$   rB   r+   r   r   r   r6   {   s   
zLogSTFTMagnitudeLoss.forwardr7   r   r   r   r   r=   j   s    

r=   c                       sN   e Zd ZdZd fdd	Zedd	 Zed
d Ze ddddZ	  Z
S )STFTLosszSTFT loss module.   x   X  hann_windowc                    sD   t t|   || _|| _|| _tt||| _t	 | _
t | _dS )zInitialize STFT loss module.N)superrL   __init__r   
shift_sizer   getattrr   r   r   spectral_convergence_lossr=   log_stft_magnitude_loss)r&   r   rS   r   r   	__class__r   r   rR      s   zSTFTLoss.__init__c                 C   r>   N)r   r    r   Tr?   r   yrB   r   r   r   r%   r   r   r   r'      rC   zSTFTLoss.input_typesc                 C   s   t t dt t ddS Nr,   )sc_lossmag_lossr.   r%   r   r   r   r/      r(   zSTFTLoss.output_typesNrD   c                C   s   | j j|jkr| j |j| _ t|| j| j| j| j }t|| j| j| j| j }| j||d}|durUt	|t
| j d }t||jd ksUJ t| d|jd  | j|||d}||fS )ac  Calculate forward propagation.
        Args:
            x (Tensor): Predicted signal (B, T).
            y (Tensor): Groundtruth signal (B, T).
            input_lengths (Tensor): Length of groundtruth sample in samples (B).
        Returns:
            Tensor: Spectral convergence loss value.
            Tensor: Log STFT magnitude loss value.
        r"   Nr
   z != rA   )r   devicetor   r   rS   r   rU   r   floorfloatmaxrJ   rV   )r&   r   r[   rB   r#   r$   r^   r_   r   r   r   r6      s   .zSTFTLoss.forward)rM   rN   rO   rP   r8   r9   r:   r;   rR   r<   r'   r/   r   r6   __classcell__r   r   rW   r   rL      s    


rL   c                       sN   e Zd ZdZd fdd	Zedd Zedd Ze d	d
ddZ	  Z
S )MultiResolutionSTFTLossz"Multi resolution STFT loss module.rP   c                    sx   t t|   t|t|  krt|ksJ  J tj | _t|||D ]\}}}|  jt	||||g7  _q'dS )a  Initialize Multi resolution STFT loss module.
        Args:
            fft_sizes (list): List of FFT sizes.
            hop_sizes (list): List of hop sizes.
            win_lengths (list): List of window lengths.
            window (str): Window function type.
        N)
rQ   rg   rR   lenr   nn
ModuleListstft_lossesziprL   )r&   	fft_sizes	hop_sizeswin_lengthsr   fssswlrW   r   r   rR      s   (z MultiResolutionSTFTLoss.__init__c                 C   r>   rY   r\   r%   r   r   r   r'      rC   z#MultiResolutionSTFTLoss.input_typesc                 C   s   t t dgt t dgdS r]   r.   r%   r   r   r   r/      s   z$MultiResolutionSTFTLoss.output_typesNrD   c          
      C   s^   dgt | j }dgt | j }t| jD ]\}}||||d\}}	|||< |	||< q||fS )a  Calculate forward propagation.
        Args:
            x (Tensor): Predicted signal (B, T).
            y (Tensor): Groundtruth signal (B, T).
            input_lengths (Tensor): Length of groundtruth sample in samples (B).
        Returns:
            List[Tensor]: Multi resolution spectral convergence loss value.
            List[Tensor]: Multi resolution log STFT magnitude loss value.
        g        rZ   )rh   rk   	enumerate)
r&   r   r[   rB   r^   r_   ifsc_lmag_lr   r   r   r6      s   
zMultiResolutionSTFTLoss.forward)rP   re   r   r   rW   r   rg      s    

rg   )r   torch.nn.functionalri   
functionalrG   nemo.core.classesr   r   nemo.core.neural_types.elementsr   r   r   r   "nemo.core.neural_types.neural_typer   r   r   r=   rL   rg   r   r   r   r   <module>   s   ,"'4