o
    Si	                     @   s   d Z ddlZddlZddlZddlmZ ddlmZ ddlm	Z	 ej
ej ZejZejej
e ej
e f Zddejeef d	ejeef fd
dZG dd dejZG dd dejZdd Zedkrie  dS dS )z3MS-STFT discriminator, provided here for reference.    N)nn)	rearrange   )
NormConv2dr   r   kernel_sizedilationc                 C   s0   | d d |d  d | d d |d  d fS )Nr   r       )r   r   r
   r
   C/home/ubuntu/.local/lib/python3.10/site-packages/encodec/msstftd.pyget_2d_padding   s   0r   c                       s   e Zd ZdZddddddddg dddd	d
ddifdededededededededejeef dejdejeef dede	de	de
f fddZdejfdd Z  ZS )!DiscriminatorSTFTa3  STFT sub-discriminator.
    Args:
        filters (int): Number of filters in convolutions
        in_channels (int): Number of input channels. Default: 1
        out_channels (int): Number of output channels. Default: 1
        n_fft (int): Size of FFT for each scale. Default: 1024
        hop_length (int): Length of hop between STFT windows for each scale. Default: 256
        kernel_size (tuple of int): Inner Conv2d kernel sizes. Default: ``(3, 9)``
        stride (tuple of int): Inner Conv2d strides. Default: ``(1, 2)``
        dilations (list of int): Inner Conv2d dilation on the time dimension. Default: ``[1, 2, 4]``
        win_length (int): Window size for each scale. Default: 1024
        normalized (bool): Whether to normalize by magnitude after stft. Default: True
        norm (str): Normalization method. Default: `'weight_norm'`
        activation (str): Activation function. Default: `'LeakyReLU'`
        activation_params (dict): Parameters to provide to the activation function.
        growth (int): Growth factor for the filters. Default: 1
    r         )   	   )r   r	      )r   r	   Tweight_norm	LeakyReLUnegative_slopeg?filtersin_channelsout_channelsn_fft
hop_length
win_lengthmax_filtersfilters_scaler   	dilationsstride
normalizednorm
activationactivation_paramsc                    s  t    t|	dksJ t|dksJ || _|| _|| _|| _|| _|| _|| _	t
tj|d	i || _tjj| j| j| jtj| j	dd d d| _d| j }t | _| jt|| j|	t|	d t|| j |}t|
D ](\}}t||d  | j |}| jt|||	||dft|	|df|d |}qot|t|
d  | j |}| jt|||	d |	d ft|	d |	d f|d t|| j|	d |	d ft|	d |	d f|d| _d S )
Nr	   F)r   r   r   	window_fnr    centerpad_modepower)r   paddingr   )r   r   r   r(   r!   r   )r   r(   r!   r
   )super__init__lenr   r   r   r   r   r   r    getattrtorchr   r"   
torchaudio
transformsSpectrogramhann_windowspec_transform
ModuleListconvsappendr   r   min	enumerate	conv_post)selfr   r   r   r   r   r   r   r   r   r   r   r    r!   r"   r#   spec_channelsin_chsir   out_chs	__class__r
   r   r*   .   sL   



zDiscriminatorSTFT.__init__xc                 C   sp   g }|  |}tj|j|jgdd}t|d}t| jD ]\}}||}| |}|	| q| 
|}||fS )Nr   )dimzb c w t -> b c t w)r2   r-   catrealimagr   r7   r4   r"   r5   r8   )r9   r@   fmapzr<   layerr
   r
   r   forwardV   s   



zDiscriminatorSTFT.forward)__name__
__module____qualname____doc__inttpTupleListboolstrdictr*   r-   TensorrH   __classcell__r
   r
   r>   r   r      sB    
(r   c                       sx   e Zd ZdZddg dg dg dfdedededeje d	eje d
eje f fddZdej	de
fddZ  ZS )MultiScaleSTFTDiscriminatora  Multi-Scale STFT (MS-STFT) discriminator.
    Args:
        filters (int): Number of filters in convolutions
        in_channels (int): Number of input channels. Default: 1
        out_channels (int): Number of output channels. Default: 1
        n_ffts (Sequence[int]): Size of FFT for each scale
        hop_lengths (Sequence[int]): Length of hop between STFT windows for each scale
        win_lengths (Sequence[int]): Window size for each scale
        **kwargs: additional args for STFTDiscriminator
    r   )r   i      )r   rW      r   r   r   n_fftshop_lengthswin_lengthsc              	      sp   t    tt  krtksJ  J t fddttD | _t| j| _d S )Nc              
      s4   g | ]}t  f| | | d qS ))r   r   r   r   r   )r   ).0r<   r   rZ   r   kwargsrY   r   r[   r
   r   
<listcomp>s   s    
z8MultiScaleSTFTDiscriminator.__init__.<locals>.<listcomp>)r)   r*   r+   r   r3   rangediscriminatorsnum_discriminators)r9   r   r   r   rY   rZ   r[   r^   r>   r]   r   r*   n   s   
(

z$MultiScaleSTFTDiscriminator.__init__r@   returnc                 C   s<   g }g }| j D ]}||\}}|| || q||fS )N)ra   r5   )r9   r@   logitsfmapsdisclogitrE   r
   r
   r   rH   z   s   

z#MultiScaleSTFTDiscriminator.forward)rI   rJ   rK   rL   rM   rN   rP   r*   r-   rT   DiscriminatorOutputrH   rU   r
   r
   r>   r   rV   c   s    
rV   c                  C   s   t dd} tddd}tddd}| |\}}| |\}}t|t|  kr=t|  kr=t|  kr=| jks@J  J tdd || D sMJ tdd || D sZJ tdd || D sgJ d S )	N    )r   r   i]  c                 S   s   g | ]}t |d kqS )   )r+   )r\   fmr
   r
   r   r_      s    ztest.<locals>.<listcomp>c                 S   s0   g | ]}|D ]}t |jd d ddgkqqS )Nr	   r   ri   )listshape)r\   rk   fr
   r
   r   r_      s   0 c                 S   s   g | ]	}t |jd kqS )r   )r+   rm   )r\   rd   r
   r
   r   r_      s    )rV   r-   randnr+   rb   all)rf   yy_haty_disc_rfmap_r
y_disc_genfmap_genr
   r
   r   test   s   
Brw   __main__)r   )rL   typingrN   r.   r-   r   einopsr   modulesr   rP   rT   FeatureMapType
LogitsTyperO   rh   rM   r   Moduler   rV   rw   rI   r
   r
   r
   r   <module>   s"   (G!
