o
    i,/                     @   s   d Z ddlZddlZddlZddlmZmZmZmZ ddl	Z
ddlZddlm  mZ ddlmZ ddlmZ ddlmZ G dd dejjZG d	d
 d
ejjZdS )zbStyleMelGAN Modules.

This code is modified from https://github.com/kan-bayashi/ParallelWaveGAN.

    N)AnyDictListOptional)MelGANDiscriminator)PQMF)TADEResBlockc                       s   e Zd ZdZdddddddg d	d
ddig ddddfdedededededededee dedeee	f dee dededef fddZ
	 d.d!ejd"eej d#ejfd$d%Zd&d' Zd(d) Zd*d+ Zd!ejd#ejfd,d-Z  ZS )/StyleMelGANGeneratorzStyle MelGAN generator module.   P   @      	      T)   r   r   r   	LeakyReLUnegative_slope皙?)	r   r   r   r   r   r   r   r   r   nearestsoftmaxin_channelsaux_channelschannelsout_channelskernel_sizedilationbiasnoise_upsample_scalesnoise_upsample_activation noise_upsample_activation_paramsupsample_scalesupsample_modegated_functionuse_weight_normc                    s4  t    || _g }|}|D ],}|tjj|||d ||d |d  |d |dg7 }|ttj|	di |
g7 }|}qtjj| | _t	t
|| _tj | _|}|D ]}|  jt||||||||dg7  _|}qTt	t
|| | _tjtjj|||d||d d dtj | _|r|   |   dS )a  Initilize StyleMelGANGenerator module.

        Args:
            in_channels (int): Number of input noise channels.
            aux_channels (int): Number of auxiliary input channels.
            channels (int): Number of channels for conv layer.
            out_channels (int): Number of output channels.
            kernel_size (int): Kernel size of conv layers.
            dilation (int): Dilation factor for conv layers.
            bias (bool): Whether to add bias parameter in convolution layers.
            noise_upsample_scales (List[int]): List of noise upsampling scales.
            noise_upsample_activation (str): Activation function module name for noise
                upsampling.
            noise_upsample_activation_params (Dict[str, Any]): Hyperparameters for the
                above activation function.
            upsample_scales (List[int]): List of upsampling scales.
            upsample_mode (str): Upsampling mode in TADE layer.
            gated_function (str): Gated function used in TADEResBlock
                ("softmax" or "sigmoid").
            use_weight_norm (bool): Whether to use weight norm.
                If set to true, it will be applied to all of the conv layers.

        r   )stridepaddingoutput_paddingr   )r   r   r   r   r   upsample_factorr!   r"   r   )r   r%   N )super__init__r   torchnnConvTranspose1dgetattr
Sequentialnoise_upsampleintnpprodnoise_upsample_factor
ModuleListblocksr   r'   Conv1dTanhoutput_convapply_weight_normreset_parameters)selfr   r   r   r   r   r   r   r   r   r   r    r!   r"   r#   r0   in_chsnoise_upsample_scaleaux_chsupsample_scale	__class__r(   ]/home/ubuntu/.local/lib/python3.10/site-packages/espnet2/gan_tts/style_melgan/style_melgan.pyr*      sn   
(
zStyleMelGANGenerator.__init__Nczreturnc                 C   s^   |du rt |d| jdj|j|jd}| |}| jD ]	}|||\}}q| 	|}|S )a  Calculate forward propagation.

        Args:
            c (Tensor): Auxiliary input tensor (B, channels, T).
            z (Tensor): Input noise tensor (B, in_channels, 1).

        Returns:
            Tensor: Output tensor (B, out_channels, T ** prod(upsample_scales)).

        Nr   r   )devicedtype)
r+   randnsizer   torG   rH   r0   r6   r9   )r<   rD   rE   xblockr(   r(   rC   forward   s   


zStyleMelGANGenerator.forwardc                 C       dt jjfdd}| | dS )z:Remove weight normalization module from all of the layers.mc                 S   s<   zt d|  d tjj|  W d S  ty   Y d S w )NzWeight norm is removed from .)loggingdebugr+   r,   utilsremove_weight_norm
ValueErrorrP   r(   r(   rC   _remove_weight_norm   s   zDStyleMelGANGenerator.remove_weight_norm.<locals>._remove_weight_normNr+   r,   Moduleapply)r<   rX   r(   r(   rC   rU         z'StyleMelGANGenerator.remove_weight_normc                 C   rO   )9Apply weight normalization module from all of the layers.rP   c                 S   D   t | tjjst | tjjr tjj|  td|  d d S d S NzWeight norm is applied to rQ   	
isinstancer+   r,   r7   r-   rT   weight_normrR   rS   rW   r(   r(   rC   _apply_weight_norm      zBStyleMelGANGenerator.apply_weight_norm.<locals>._apply_weight_normNrY   r<   rc   r(   r(   rC   r:      r\   z&StyleMelGANGenerator.apply_weight_normc                 C   rO   )Reset parameters.rP   c                 S   F   t | tjjst | tjjr!| jjdd t	d|  d d S d S Ng        g{Gz?zReset parameters in rQ   
ra   r+   r,   r7   r-   weightdatanormal_rR   rS   rW   r(   r(   rC   _reset_parameters      z@StyleMelGANGenerator.reset_parameters.<locals>._reset_parametersNrY   r<   rm   r(   r(   rC   r;      r\   z%StyleMelGANGenerator.reset_parametersc                 C   s   | ddd}d| jt|d| j f}tj|dtj	i
t|  j}| |}|d| j }t|d|d|d fd}| jD ]	}|||\}}qJ| |dd|f }|d ddS )zPerform inference.

        Args:
            c (Tensor): Input tensor (T, in_channels).

        Returns:
            Tensor: Output tensor (T ** prod(upsample_scales), out_channels).

        r   r   r   rH   	replicate.N)	transpose	unsqueezer   mathceilrJ   r4   r+   rI   floatrK   next
parametersrG   r0   r'   Fpadr6   r9   squeeze)r<   rD   
noise_sizenoiserL   total_lengthrM   r(   r(   rC   	inference   s   

"
zStyleMelGANGenerator.inference)N)__name__
__module____qualname____doc__r1   boolr   strr   r   r*   r+   Tensorr   rN   rU   r:   r;   r~   __classcell__r(   r(   rA   rC   r	      sr    	

j
r	   c                       s   e Zd ZdZdg dg dg dg dg dgdd	d
gdddg ddddidi d
dfdedee deee  deeef de	f
 fddZ
dejdeej fddZdejdeej fdd Zd!d" Zd#d$ Z  ZS )%StyleMelGANDiscriminatorz!Style MelGAN disciminator module.r   )   i   i   i   )r   NNN)r   >   gJ+?      "@)   r   gV-?r   )   r   g[tY?r   r            r   T)r   r   r   r   r   r   r   ReflectionPad1d)
r   kernel_sizesr   max_downsample_channelsr   downsample_scalesnonlinear_activationnonlinear_activation_paramsry   
pad_paramsrepeatswindow_sizespqmf_paramsdiscriminator_paramsr#   c                    s  t    t|t|ksJ dd t||D  t|t fdd D ks*J || _|| _tj	 | _
tj	 | _|D ]6}t|}|d |d< |d dkr]|  j
tj g7  _
n
|  j
t| g7  _
|  jtdi |g7  _q>|r{|   |   dS )	a  Initilize StyleMelGANDiscriminator module.

        Args:
            repeats (int): Number of repititons to apply RWD.
            window_sizes (List[int]): List of random window sizes.
            pqmf_params (List[List[int]]): List of list of Parameters for PQMF modules
            discriminator_params (Dict[str, Any]): Parameters for base discriminator
                module.
            use_weight_nom (bool): Whether to apply weight normalization.

        c                 S   s   g | ]
\}}||d   qS r   r(   ).0wspr(   r(   rC   
<listcomp>  s    z5StyleMelGANDiscriminator.__init__.<locals>.<listcomp>c                    s   g | ]} d  |kqS r   r(   )r   rJ   sizesr(   rC   r     s    r   r   r   Nr(   )r)   r*   lenzipsumr   r   r+   r,   r5   pqmfsdiscriminatorscopydeepcopyIdentityr   BaseDiscriminatorr:   r;   )r<   r   r   r   r   r#   
pqmf_paramd_paramsrA   r   rC   r*      s$   
#"
z!StyleMelGANDiscriminator.__init__rL   rF   c                 C   s&   g }t | jD ]	}|| |7 }q|S )zCalculate forward propagation.

        Args:
            x (Tensor): Input tensor (B, 1, T).

        Returns:
            List: List of discriminator outputs, #items in the list will be
                equal to repeats * #discriminators.

        )ranger   _forward)r<   rL   outs_r(   r(   rC   rN   (  s   z StyleMelGANDiscriminator.forwardc           	      C   s   g }t t| j| j| jD ]6\}\}}}tj|d| }|d d d d ||| f }|dkr7||}n|	|}|||g7 }q|S )Nr   )
	enumerater   r   r   r   r2   randomrandintrJ   analysis)	r<   rL   r   idxr   pqmfdisc	start_idxx_r(   r(   rC   r   9  s   

z!StyleMelGANDiscriminator._forwardc                 C   rO   )r]   rP   c                 S   r^   r_   r`   rW   r(   r(   rC   rc   L  rd   zFStyleMelGANDiscriminator.apply_weight_norm.<locals>._apply_weight_normNrY   re   r(   r(   rC   r:   I  r\   z*StyleMelGANDiscriminator.apply_weight_normc                 C   rO   )rf   rP   c                 S   rg   rh   ri   rW   r(   r(   rC   rm   X  rn   zDStyleMelGANDiscriminator.reset_parameters.<locals>._reset_parametersNrY   ro   r(   r(   rC   r;   U  r\   z)StyleMelGANDiscriminator.reset_parameters)r   r   r   r   r1   r   r   r   r   r   r*   r+   r   rN   r   r:   r;   r   r(   r(   rA   rC   r      sH    


>r   )r   r   rR   rs   typingr   r   r   r   numpyr2   r+   torch.nn.functionalr,   
functionalrx   espnet2.gan_tts.melganr   r   espnet2.gan_tts.melgan.pqmfr   +espnet2.gan_tts.style_melgan.tade_res_blockr   rZ   r	   r   r(   r(   r(   rC   <module>   s    P