o
    ·Ð¯i;–  ã                   @   sR  d dl Z d dlZd dlZd dlZd dlZd dlmZ d dlm  m	Z
 d dlmZ G dd„ dejƒZG dd„ deƒZG dd„ deƒZd	d
„ ZG dd„ deƒZG dd„ dejƒZG dd„ dejƒZG dd„ dejƒZG dd„ dejƒZdd„ Zedkr'ejddZejdedddgd e ¡ ZejZdZ ej! "d ¡ dZ#e#d Z$d Z%d!Z&d Z'd"Z(d#Z)d$Z*d%Z+d&Z,e#d' Z-d(Z.d)Z/d*Z0ej! 1d+de$¡Z2e 3e2¡ 4e¡Z5ee%e&e'e(e)e*d#d,Z6ee#e%e+e,e-e.e/e0d#d-	Z7e6 4e¡ e7 4e¡ e6 8e5ddd…f ¡Z9e7 8e9¡Z:	 ed.ed/ ed0ed/ ed1ed/ ed2ed/ z	ed3ed/ W dS    e;d4ƒ‚dS )5é    N)Ú	Parameterc                       s,   e Zd Z‡ fdd„Zdd„ Zdd„ Z‡  ZS )ÚDFTBasec                    s   t t| ƒ ¡  dS )z,Base class for DFT and IDFT matrix.
        N)Úsuperr   Ú__init__)Úself©Ú	__class__© úE/home/ubuntu/.local/lib/python3.10/site-packages/torchlibrosa/stft.pyr      s   zDFTBase.__init__c                 C   óH   t  t  |¡t  |¡¡\}}t  dt j d | ¡}t  ||| ¡}|S )Néþÿÿÿù              ð?©ÚnpÚmeshgridÚarangeÚexpÚpiÚpower©r   ÚnÚxÚyÚomegaÚWr	   r	   r
   Ú
dft_matrix   ó   zDFTBase.dft_matrixc                 C   r   )Né   r   r   r   r	   r	   r
   Úidft_matrix   r   zDFTBase.idft_matrix)Ú__name__Ú
__module__Ú__qualname__r   r   r   Ú__classcell__r	   r	   r   r
   r      s    r   c                       s<   e Zd Z‡ fdd„Zdd„ Zdd„ Zdd„ Zd	d
„ Z‡  ZS )ÚDFTc                    s†   t t| ƒ ¡  |  |¡| _|  |¡| _t t	 
| j¡¡| _t t	 | j¡¡| _t t	 
| j¡¡| _t t	 | j¡¡| _|| _|| _dS )zÍCalculate discrete Fourier transform (DFT), inverse DFT (IDFT, 
        right DFT (RDFT) RDFT, and inverse RDFT (IRDFT.) 

        Args:
          n: fft window size
          norm: None | 'ortho'
        N)r   r#   r   r   r   r   Úinv_WÚtorchÚTensorr   ÚrealÚW_realÚimagÚW_imagÚ
inv_W_realÚ
inv_W_imagr   Únorm)r   r   r-   r   r	   r
   r   !   s   
zDFT.__init__c                 C   s~   t  || j¡t  || j¡ }t  || j¡t  || j¡ }| jdu r&	 ||fS | jdkr;|t | j¡ }|t | j¡ }||fS )a   Calculate DFT of a signal.

        Args:
            x_real: (n,), real part of a signal
            x_imag: (n,), imag part of a signal

        Returns:
            z_real: (n,), real part of output
            z_imag: (n,), imag part of output
        NÚortho)r%   Úmatmulr(   r*   r-   ÚmathÚsqrtr   ©r   Úx_realÚx_imagÚz_realÚz_imagr	   r	   r
   Údft6   s   

üzDFT.dftc                 C   s‚   t  || j¡t  || j¡ }t  || j¡t  || j¡ }| jdu r*|| j }||fS | jdkr=|t t¡ }|t t¡ }||fS )a   Calculate IDFT of a signal.

        Args:
            x_real: (n,), real part of a signal
            x_imag: (n,), imag part of a signal
        Returns:
            z_real: (n,), real part of output
            z_imag: (n,), imag part of output
        Nr.   )r%   r/   r+   r,   r-   r   r0   r1   r2   r	   r	   r
   ÚidftM   s   



üzDFT.idftc                 C   sˆ   | j d d }t || jdd|…f ¡}t || jdd|…f ¡}| jdu r+	 ||fS | jdkr@|t | j ¡ }|t | j ¡ }||fS )a  Calculate right RDFT of signal.

        Args:
            x_real: (n,), real part of a signal
            x_imag: (n,), imag part of a signal

        Returns:
            z_real: (n // 2 + 1,), real part of output
            z_imag: (n // 2 + 1,), imag part of output
        r   é   .r   Nr.   )r   r%   r/   r(   r*   r-   r0   r1   )r   r3   Ún_rfftr5   r6   r	   r	   r
   Úrdftc   s   

üzDFT.rdftc                 C   sÂ   | j d d }tj|dd}tj|dd}tj||dd|d …f fdd}tj|d|dd|d …f  fdd}t || j¡t || j¡ }| jd	u rS|| j  }|S | jd
kr_|t 	t ¡ }|S )a  Calculate IRDFT of signal.
        
        Args:
            x_real: (n // 2 + 1,), real part of a signal
            x_imag: (n // 2 + 1,), imag part of a signal

        Returns:
            z_real: (n,), real part of output
            z_imag: (n,), imag part of output
        r   r9   )éÿÿÿÿ©Údims.r<   ©Údimg      ð¿Nr.   )
r   r%   ÚflipÚcatr/   r+   r,   r-   r0   r1   )r   r3   r4   r:   Úflip_x_realÚflip_x_imagr5   r	   r	   r
   Úirdft{   s   "&


ýz	DFT.irdft)	r   r    r!   r   r7   r8   r;   rE   r"   r	   r	   r   r
   r#       s    r#   c                       s*   e Zd Z		d
‡ fdd„	Zdd	„ Z‡  ZS )ÚSTFTé   NÚhannTÚreflectc              
      s˜  t t| ƒ ¡  |dv sJ ‚|| _|| _|| _|| _|| _|| _| jdu r'|| _| jdu r4t	| jd ƒ| _t
jj|| jdd}t
jj||d}|  |¡| _|d d }	tjd|	|| jd	ddd
d| _tjd|	|| jd	ddd
d| _t t | jdd…d	|	…f |dd…df  ¡j¡dd…ddd…f | jj_t t | jdd…d	|	…f |dd…df  ¡j¡dd…ddd…f | jj_|rÈ|  ¡ D ]}
d
|
_qÂdS dS )a'  PyTorch implementation of STFT with Conv1d. The function has the 
        same output as librosa.stft.

        Args:
            n_fft: int, fft window size, e.g., 2048
            hop_length: int, hop length samples, e.g., 441
            win_length: int, window length e.g., 2048
            window: str, window function name, e.g., 'hann'
            center: bool
            pad_mode: str, e.g., 'reflect'
            freeze_parameters: bool, set to True to freeze all parameters. Set
                to False to finetune all parameters.
        ©ÚconstantrI   Né   T©Úfftbins©Úsizer   r9   r   F©Úin_channelsÚout_channelsÚkernel_sizeÚstrideÚpaddingÚdilationÚgroupsÚbias)r   rF   r   Ún_fftÚ
hop_lengthÚ
win_lengthÚwindowÚcenterÚpad_modeÚintÚlibrosaÚfiltersÚ
get_windowÚutilÚ
pad_centerr   r   ÚnnÚConv1dÚ	conv_realÚ	conv_imagr%   r&   r   r'   ÚTÚweightÚdatar)   Ú
parametersÚrequires_grad)r   rZ   r[   r\   r]   r^   r_   Úfreeze_parametersÚ
fft_windowrS   Úparamr   r	   r
   r   œ   sN   


þ
þ,ÿ
ÿ,ÿ
ÿþzSTFT.__init__c                 C   s¢   |dd…ddd…f }| j rtj|| jd | jd f| jd}|  |¡}|  |¡}|dd…ddd…dd…f  dd¡}|dd…ddd…dd…f  dd¡}||fS )a  Calculate STFT of batch of signals.

        Args: 
            input: (batch_size, data_length), input signals.

        Returns:
            real: (batch_size, 1, time_steps, n_fft // 2 + 1)
            imag: (batch_size, 1, time_steps, n_fft // 2 + 1)
        Nr   ©ÚpadÚmodeé   )r^   ÚFrs   rZ   r_   rh   ri   Ú	transpose)r   Úinputr   r'   r)   r	   r	   r
   ÚforwardÝ   s   "

$$zSTFT.forward)rG   NNrH   TrI   T©r   r    r!   r   ry   r"   r	   r	   r   r
   rF   ›   s
    ÿArF   c                 C   sF   | d |d  d }| t  |dtj¡ }|t  |dtj¡ }|||fS )a?  Calculate magnitude and phase from real and imag part of signals.

    Args:
        real: tensor, real part of signals
        imag: tensor, imag part of signals

    Returns:
        mag: tensor, magnitude of signals
        cos: tensor, cosine of phases of signals
        sin: tensor, sine of phases of signals
    r   g      à?ç»½×Ùß|Û=)r%   Úclampr   Úinf)r'   r)   ÚmagÚcosÚsinr	   r	   r
   Úmagphaseø   s   
r   c                       s|   e Zd Z			d‡ fdd„	Zd	d
„ Zdd„ Zdd„ Zdd„ Zdd„ Zdd„ Z	dd„ Z
dd„ Zdd„ Zdd„ Zdd„ Z‡  ZS ) ÚISTFTrG   NrH   TrI   Fc                    sÖ   t t| ƒ ¡  |dv sJ ‚|s|	du sJ dƒ‚|
du sJ dƒ‚|| _|| _|| _|| _|| _|| _|| _	| jdu r=| j| _| jdu rJt
| jd ƒ| _|  ¡  |  ¡  | j	r[|  |	|
¡ |rg|  ¡ D ]}d|_qadS dS )a™  PyTorch implementation of ISTFT with Conv1d. The function has the 
        same output as librosa.istft.

        Args:
            n_fft: int, fft window size, e.g., 2048
            hop_length: int, hop length samples, e.g., 441
            win_length: int, window length e.g., 2048
            window: str, window function name, e.g., 'hann'
            center: bool
            pad_mode: str, e.g., 'reflect'
            freeze_parameters: bool, set to True to freeze all parameters. Set
                to False to finetune all parameters.
            onnx: bool, set to True when exporting trained model to ONNX. This
                will replace several operations to operators supported by ONNX.
            frames_num: None | int, number of frames of audio clips to be 
                inferneced. Only useable when onnx=True.
            device: None | str, device of ONNX. Only useable when onnx=True.
        rJ   Nz)When onnx=False, frames_num must be None!z%When onnx=False, device must be None!rL   F)r   r‚   r   rZ   r[   r\   r]   r^   r_   Úonnxr`   Úinit_real_imag_convÚinit_overlap_add_windowÚinit_onnx_modulesrm   rn   )r   rZ   r[   r\   r]   r^   r_   ro   rƒ   Ú
frames_numÚdevicerq   r   r	   r
   r     s2   

þzISTFT.__init__c              
   C   sø   |   | j¡| j | _tj| j| jddddddd| _tj| j| jddddddd| _tjj	| j
| jdd}tjj|| jd}t t | j|ddd…f  ¡j¡dd…dd…df | jj_t t | j|ddd…f  ¡j¡dd…dd…df | jj_dS )	zEInitialize Conv1d for calculating real and imag part of DFT.
        r9   r   FrQ   TrM   rO   N)r   rZ   r   rf   rg   rh   ri   ra   rb   rc   r]   r\   rd   re   r%   r&   r   r'   rj   rk   rl   r)   )r   Úifft_windowr	   r	   r
   r„   G  s*   þþÿ
ÿÿÿzISTFT.init_real_imag_convc                 C   sV   t jj| j| jdd}t jj|ddd }t jj|| jd}t	 
|¡}|  d|¡ dS )zKInitialize overlap add window for reconstruct time domain signals.
        TrM   N©r-   r   rO   Ú
ola_window)ra   rb   rc   r]   r\   rd   Ú	normalizere   rZ   r%   r&   Úregister_buffer)r   r‹   r	   r	   r
   r…   b  s
   
zISTFT.init_overlap_add_windowc                 C   s  t j| jd d | jd d ddd| _t | jd d | jd d df¡}t t | jd d ¡ddd… ¡|dd…dd…df< t 	|¡| jj
_t j| jd| jdf| jdfdd| _t 	t | j¡dd…ddd…df ¡| jj
_|r||  ||¡| _dS g | _dS )	zkInitialize ONNX modules.

        Args:
            frames_num: int
            device: str | None
        r   r9   F)rR   rS   rT   rY   Nr<   r   )rR   rS   rT   rU   rY   )rf   rg   rZ   Úreverser   ÚzerosÚarrayÚeyer%   r&   rk   rl   ÚConvTranspose2dr[   Úoverlap_addÚ_get_ifft_window_sum_onnxÚifft_window_sum)r   r‡   rˆ   Útmpr	   r	   r
   r†   p  s   
ÿ$6ÿ,
zISTFT.init_onnx_modulesc                 C   sÞ   |  ¡ dkr|  ¡ dksJ ‚|j\}}}}|dd…ddd…dd…f  dd¡}|dd…ddd…dd…f  dd¡}| jrE|  ||¡\}}n|  ||¡\}}|  |¡|  |¡ }	| jra|  |	|¡}
n|  	|	|¡}
|  
|
|¡}
|
S )a6  Calculate inverse STFT.

        Args:
            real_stft: (batch_size, channels=1, time_steps, n_fft // 2 + 1)
            imag_stft: (batch_size, channels=1, time_steps, n_fft // 2 + 1)
            length: int
        
        Returns:
            real: (batch_size, data_length), output signals.
        rL   Nr   r9   r   )Ú
ndimensionÚshaperw   rƒ   Ú_get_full_stft_onnxÚ_get_full_stftrh   ri   Ú#_overlap_add_divide_window_sum_onnxÚ_overlap_add_divide_window_sumÚ_trim_edges)r   Ú	real_stftÚ	imag_stftÚlengthÚ
batch_sizeÚ_r‡   Úfull_real_stftÚfull_imag_stftÚs_realr   r	   r	   r
   ry   ‘  s   $$zISTFT.forwardc                 C   sr   t j|t j|dd…dd…dd…f dgdfdd}t j|t j|dd…dd…dd…f dgd fdd}||fS )af  Get full stft representation from spectrum using symmetry attribute.

        Args:
            real_stft: (batch_size, n_fft // 2 + 1, time_steps)
            imag_stft: (batch_size, n_fft // 2 + 1, time_steps)

        Returns:
            full_real_stft: (batch_size, n_fft, time_steps)
            full_imag_stft: (batch_size, n_fft, time_steps)
        Nr9   r<   r=   r?   )r%   rB   rA   ©r   rž   rŸ   r£   r¤   r	   r	   r
   rš   »  s   46zISTFT._get_full_stftc                 C   s:   t j||  |¡fdd}t j||  |¡ fdd}||fS )aÜ  Get full stft representation from spectrum using symmetry attribute
        for ONNX. Replace several pytorch operations in self._get_full_stft() 
        that are not supported by ONNX.

        Args:
            real_stft: (batch_size, n_fft // 2 + 1, time_steps)
            imag_stft: (batch_size, n_fft // 2 + 1, time_steps)

        Returns:
            full_real_stft: (batch_size, n_fft, time_steps)
            full_imag_stft: (batch_size, n_fft, time_steps)
        r9   r?   )r%   rB   rŽ   r¦   r	   r	   r
   r™   Ë  s   zISTFT._get_full_stft_onnxc                 C   sŠ   |j d d | j | j }tjjj|d|fd| jfd| jfd}|dd…dddd…f }|  |¡}t |dt	j
¡}||ddd…f  }|S )zíOverlap add signals in frames to reconstruct signals.

        Args:
            s_real: (batch_size, n_fft, time_steps), signals in frames
            frames_num: int

        Returns:
            y: (batch_size, audio_samples)
        r<   r9   ©rx   Úoutput_sizerT   rU   Nr   ç•dyáý¥=)r˜   r[   r\   r%   rf   Ú
functionalÚfoldÚ_get_ifft_windowr|   r   r}   )r   r¥   r‡   Úoutput_samplesr   r•   r	   r	   r
   rœ   ß  s   ÿ
	z$ISTFT._overlap_add_divide_window_sumc                 C   s`   |d | j  | j }| jddd…df  dd|¡}tj|d|fd| jfd| j fd}| ¡ }|S )zÑGet overlap-add window sum to be divided.

        Args:
            frames_num: int

        Returns:
            ifft_window_sum: (audio_samlpes,), overlap-add window sum to be 
            divided.
        r9   Nr§   )r[   r\   r‹   Úrepeatrv   r«   Úsqueeze)r   r‡   r­   Úwindow_matrixr•   r	   r	   r
   r¬   	  s   þzISTFT._get_ifft_windowc                 C   sx   |d }|   |¡dd…ddd…df }t| jƒ|jd kr'|j}|  ||¡| _t | jdtj	¡}||ddd…f  }|S )at  Overlap add signals in frames to reconstruct signals for ONNX. 
        Replace several pytorch operations in 
        self._overlap_add_divide_window_sum() that are not supported by ONNX.

        Args:
            s_real: (batch_size, n_fft, time_steps), signals in frames
            frames_num: int

        Returns:
            y: (batch_size, audio_samples)
        ).NNr   r9   r©   )
r“   Úlenr•   r˜   rˆ   r”   r%   r|   r   r}   )r   r¥   r‡   r   rˆ   r•   r	   r	   r
   r›   $  s   z)ISTFT._overlap_add_divide_window_sum_onnxc                 C   s:   t jj| j|| j| j| jd}t |¡}|r| 	|¡}|S )zëPre-calculate overlap-add window sum for reconstructing signals when
        using ONNX.

        Args:
            frames_num: int
            device: str | None

        Returns:
            ifft_window_sum: (audio_samples,)
        )r]   Ún_framesr\   rZ   r[   )
ra   rb   Úwindow_sumsquarer]   r\   rZ   r[   r%   r&   Úto)r   r‡   rˆ   r•   r	   r	   r
   r”   I  s   

þ

zISTFT._get_ifft_window_sum_onnxc                 C   sf   |du r| j r|dd…| jd | j d …f }|S | j r#| jd }nd}|dd…||| …f }|S )z’Trim audio.

        Args:
            y: (audio_samples,)
            length: int

        Returns:
            (trimmed_audio_samples,)
        Nr   r   )r^   rZ   )r   r   r    Ústartr	   r	   r
   r   a  s   "	ùzISTFT._trim_edges)
rG   NNrH   TrI   TFNN)r   r    r!   r   r„   r…   r†   ry   rš   r™   rœ   r¬   r›   r”   r   r"   r	   r	   r   r
   r‚     s     þ;!**%r‚   c                       s,   e Zd Z			d‡ fdd„	Zd	d
„ Z‡  ZS )ÚSpectrogramrG   NrH   TrI   ç       @c	           	   	      s0   t t| ƒ ¡  || _t||||||dd| _dS )zŒCalculate spectrogram using pytorch. The STFT is implemented with 
        Conv1d. The function has the same output of librosa.stft
        T©rZ   r[   r\   r]   r^   r_   ro   N)r   r¶   r   r   rF   Ústft)	r   rZ   r[   r\   r]   r^   r_   r   ro   r   r	   r
   r   {  s   þzSpectrogram.__init__c                 C   sB   | j  |¡\}}|d |d  }| jdkr	 |S || jd  }|S )zÃCalculate spectrogram of input signals.
        Args: 
            input: (batch_size, data_length)

        Returns:
            spectrogram: (batch_size, 1, time_steps, n_fft // 2 + 1)
        r   r·   )r¹   ry   r   )r   rx   r'   r)   Úspectrogramr	   r	   r
   ry   ‰  s   	
þzSpectrogram.forward)rG   NNrH   TrI   r·   Trz   r	   r	   r   r
   r¶   z  s    þr¶   c                       s2   e Zd Z		d‡ fd
d„	Zdd„ Zdd„ Z‡  ZS )ÚLogmelFilterBanké"V  rG   é@   ç        NTç      ð?r{   ç      T@c                    s„   t t| ƒ ¡  || _|| _|| _|	| _|dkr|d }tjj	|||||dj
| _t t | j¡¡| _|
r>|  ¡ D ]}d|_q8dS dS )zŠCalculate logmel spectrogram using pytorch. The mel filter bank is 
        the pytorch implementation of as librosa.filters.mel 
        Nr   ©ÚsrrZ   Ún_melsÚfminÚfmaxF)r   r»   r   Úis_logÚrefÚaminÚtop_dbra   rb   Úmelrj   ÚmelWrf   r   r%   r&   rm   rn   )r   rÂ   rZ   rÃ   rÄ   rÅ   rÆ   rÇ   rÈ   rÉ   ro   rq   r   r	   r
   r      s$   ÿÿþzLogmelFilterBank.__init__c                 C   s*   t  || j¡}| jr|  |¡}|S |}|S )zÈCalculate (log) mel spectrogram from spectrogram.

        Args:
            input: (*, n_fft), spectrogram
        
        Returns: 
            output: (*, mel_bins), (log) mel spectrogram
        )r%   r/   rË   rÆ   Úpower_to_db)r   rx   Úmel_spectrogramÚoutputr	   r	   r
   ry   ¸  s   
þzLogmelFilterBank.forwardc                 C   s†   | j }dt tj|| jtjd¡ }|dt t | j|¡¡ 8 }| jdurA| jdk r1t	j
j d¡‚tj|| ¡  ¡ | j tjd}|S )zaPower to db, this function is the pytorch implementation of 
        librosa.power_to_lb
        ç      $@©ÚminÚmaxNr   útop_db must be non-negative)rÇ   r%   Úlog10r|   rÈ   r   r}   ÚmaximumrÉ   ra   rd   Ú
exceptionsÚParameterErrorrÒ   Úitem©r   rx   Ú	ref_valueÚlog_specr	   r	   r
   rÌ   Ï  s   

 zLogmelFilterBank.power_to_db)
r¼   rG   r½   r¾   NTr¿   r{   rÀ   T©r   r    r!   r   ry   rÌ   r"   r	   r	   r   r
   r»   Ÿ  s    ÿr»   c                       s.   e Zd Zd	‡ fdd„	Zdd„ Zdd„ Z‡  ZS )
ÚEnframerG   é   c                    s\   t t| ƒ ¡  tjd|||ddd| _t t |¡dd…ddd…f ¡| jj	_
d| jj	_dS )zlEnframe a time sequence. This function is the pytorch implementation 
        of librosa.util.frame
        r9   r   F)rR   rS   rT   rU   rV   rY   N)r   rÝ   r   rf   rg   Úenframe_convr%   r&   r‘   rk   rl   rn   )r   Úframe_lengthr[   r   r	   r
   r   à  s   þ(zEnframe.__init__c                 C   s    |   |dd…ddd…f ¡}|S )z³Enframe signals into frames.
        Args:
            input: (batch_size, samples)
        
        Returns: 
            output: (batch_size, window_length, frames_num)
        N)rß   )r   rx   rÎ   r	   r	   r
   ry   í  s   zEnframe.forwardc                 C   s‚   | j }dt tj|| jtjd¡ }|dt t | j|¡¡ 8 }| jdur?| jdk r1t	j
j d¡‚tj|| ¡ | j tjd}|S )zbPower to db, this function is the pytorch implementation of 
        librosa.power_to_lb.
        rÏ   rÐ   Nr   rÓ   )rÇ   r%   rÔ   r|   rÈ   r   r}   rÕ   rÉ   ra   rd   rÖ   r×   rÒ   rÙ   r	   r	   r
   rÌ   ù  s   

zEnframe.power_to_db)rG   rÞ   rÜ   r	   r	   r   r
   rÝ   ß  s    rÝ   c                       s$   e Zd Z‡ fdd„Zdd„ Z‡  ZS )ÚScalarc                    sV   t t| ƒ ¡  tt |d ¡ƒ| _tt |d ¡ƒ| _|r'|  ¡ D ]}d|_	q!d S d S )NÚmeanÚstdF)
r   rá   r   r   r%   r&   Úscalar_meanÚ
scalar_stdrm   rn   )r   Úscalarro   rq   r   r	   r
   r   
  s   þzScalar.__init__c                 C   s   || j  | j S )N)rä   rå   )r   rx   r	   r	   r
   ry     s   zScalar.forwardrz   r	   r	   r   r
   rá   	  s    
rá   c           =      C   sJ  | dkröd}d}t j d¡ t j dd|¡}t |¡}t jj||d}t jj||d}t jj||d}t jj||d}	t	||ƒ}
|
 
|t |¡¡}|
 |d |d ¡}|
 |¡}|
 |d |d ¡}tdƒ tt  t  t  |¡|d  ¡  ¡  ¡¡ƒ tt  t  t  |¡|d  ¡  ¡  ¡¡ƒ tt  t  t  |¡|d  ¡  ¡  ¡¡ƒ tt  t  t  |¡|d  ¡  ¡  ¡¡ƒ tt  t  t  |¡|d  ¡  ¡  ¡¡ƒ tt  t  t  |¡|d  ¡  ¡  ¡¡ƒ tt  t  || ¡  ¡  ¡¡ƒ dS | d	kröt |¡}t j d¡ d
}|d }d}d}d}d}d}d}t j dd|¡}t |¡ |¡}tj|||||dj}t||||||dd}| |¡ | |ddd…f ¡\}}tdƒ tt  t  t  |¡|j ¡  ¡ d  ¡¡ƒ tt  t  t  |¡|j ¡  ¡ d  ¡¡ƒ tj|j||||d}t||||||dd}| |¡ | |||¡ddd…f }t ||ƒ\}}} | || ||  |¡ddd…f }!tt  t  ||j ¡  ¡  ¡¡ƒ tt  t  ||j ¡  ¡  ¡¡ƒ tt  t  ||!j ¡  ¡  ¡¡ƒ dS | dkrSt j!}"t |¡}t j d¡ d
}|d }d}d}d}d}d}d}d}#d}$|d }%d}&d}'d}(t j dd|¡}t |¡ |¡}tdƒ tj|||||||"|d}t j"|t#|d ƒ|d})tj$j%|||#|$|%d j}*t  &t  |j¡d |*¡}+tj'|+|&|'|(d!},t||||||dd}-t(|||#|$|%|&|'|(dd"	}.|- |¡ |. |¡ t)j"|dddd…f |d |d f|d#d }/tt  t  |)|/ ¡  ¡  ¡¡ƒ |- *|/dddd…f ¡d }0|- +|/dddd…f ¡d }1tt  t  t  |¡|0j ¡  ¡  ¡¡ƒ tt  t  t  |¡|1j ¡  ¡  ¡¡ƒ t,||||||dd}2|2 |¡ |2 |ddd…f ¡}3t -|3|.j.¡}4tt  t  |+|4j ¡  ¡ d  ¡¡ƒ |. |3¡}5tt  t  |,|5d j ¡  ¡  ¡¡ƒ dS | d$kr²t |¡}t j d¡ d
}|d }d}d}t j dd|¡}t |¡ |¡}td%ƒ tj/j0|||d&}6t1||d&}7|7 |¡ |7|ddd…f ƒ}8tt  t  |6|8j ¡  ¡  ¡¡ƒ dS | d'kr#t |¡}t j d¡ d
}|d }d}d}d}#t j dd|¡}t |¡ |¡}t2 3t,||d(t(||#d)d*¡}9|9 |¡ td+ƒ tj4j5|||||#d,j}:|9|ddd…f ƒ 6¡ };t  7|;j d-¡ ¡ |:¡}<td.|<› ƒ dS dS )/zCompare numpy + librosa and torchlibrosa results. For debug. 

    Args:
        select: 'dft' | 'logmel'
        device: 'cpu' | 'cuda'
    r7   é
   Nr   r<   r9   rŠ   z\Comparing librosa and pytorch implementation of DFT. All numbers below should be close to 0.r¹   r¼   rG   rÞ   rH   TrI   )r   rZ   r[   r]   r^   r¸   zqComparing librosa and pytorch implementation of STFT & ISTFT.             All numbers below should be close to 0.)r   r   )Ústft_matrixr[   r]   r^   r    Úlogmelé€   r¾   r·   r¿   r{   rÀ   zkComparing librosa and pytorch implementation of logmel spectrogram. All numbers below should be close to 0.)r   rZ   r[   r\   r]   r^   Údtyper_   r   )rt   rÁ   )rÇ   rÈ   rÉ   ©	rÂ   rZ   rÃ   rÄ   rÅ   rÇ   rÈ   rÉ   ro   rr   ÚenframezkComparing librosa and pytorch implementation of librosa.util.frame. All numbers below should be close to 0.)rà   r[   Údefault)r[   r\   F)rÂ   rÃ   rÆ   zMComparing default mel spectrogram from librosa to the pytorch implementation.)r[   rÂ   r\   rÃ   ÚcpuzPassed? )8r   ÚrandomÚseedÚuniformr%   r&   ÚfftÚifftÚrfftr#   r7   Ú
zeros_liker8   r;   rE   Úprintrâ   Úabsr'   rï   Únumpyr)   rˆ   r´   ra   r¹   rj   rF   ry   rl   Úistftr‚   r   Ú	complex64rs   r`   rb   rÊ   ÚdotrÌ   r»   rv   rh   ri   r¶   r/   rË   rd   ÚframerÝ   rf   Ú
SequentialÚfeatureÚmelspectrogramr¯   Úallclose)=Úselectrˆ   r   r-   Únp_dataÚpt_dataÚnp_fftÚnp_ifftÚnp_rfftÚnp_irfftÚobjÚpt_dftÚpt_idftÚpt_rdftÚpt_irdftÚsample_rateÚdata_lengthrZ   r[   r\   r]   r^   r_   Únp_stft_matrixÚpt_stft_extractorÚpt_stft_realÚpt_stft_imagÚ
np_istft_sÚpt_istft_extractorÚ
pt_istft_sÚpt_stft_magr   r€   Úpt_istft_s2rë   rÃ   rÄ   rÅ   rÇ   rÈ   rÉ   Únp_padÚnp_melWÚnp_mel_spectrogramÚnp_logmel_spectrogramÚstft_extractorÚlogmel_extractorÚpt_padÚpt_stft_matrix_realÚpt_stft_matrix_imagÚspectrogram_extractorÚpt_spectrogramÚpt_mel_spectrogramÚpt_logmel_spectrogramÚ	np_framesÚpt_frame_extractorÚ	pt_framesÚfeature_extractorÚnp_melspectÚpt_melspectÚpassedr	   r	   r
   Údebug  sf  


******$

ÿÿþ
,,ÿþ
"""&


þÿÿÿþ
þ

. ((þ
&
*


ÿ
&

þýü

ÿüüÖr-  Ú__main__Ú )Údescriptionz--devicerï   Úcuda)Útyperî   Úchoicesr¼   r9   rG   rÞ   rH   TrI   rê   r¾   r·   r¿   r{   rÀ   r<   r¸   rì   r7   )r  rˆ   r¹   ré   rí   rî   zqTorchlibrosa does support librosa>=0.6.0, for                 comparison with librosa, please use librosa>=0.7.0!)<r0   Úargparsera   rù   r   r%   Útorch.nnrf   Útorch.nn.functionalrª   rv   Útorch.nn.parameterr   ÚModuler   r#   rF   r   r‚   r¶   r»   rÝ   rá   r-  r   ÚArgumentParserÚparserÚadd_argumentÚstrÚ
parse_argsÚargsrˆ   r-   rð   rñ   r  r  rZ   r[   r\   r]   r^   r_   rÃ   rÄ   rÅ   rÇ   rÈ   rÉ   rò   r  r&   r´   r  r"  r  ry   r#  r%  Ú	Exceptionr	   r	   r	   r
   Ú<module>   s†    {]  q%@*  
þþ


Â