o
    i0                     @   s   d Z ddlZddlZddlmZmZmZ ddlZddl	Z	ddl
mZ ddlmZmZmZ G dd de	jjZG dd	 d	e	jjZdS )
zgParallel WaveGAN Modules.

This code is modified from https://github.com/kan-bayashi/ParallelWaveGAN.

    N)AnyDictOptional)upsample)Conv1d	Conv1d1x1ResidualBlockc                #       s  e Zd ZdZddddddddddd	d
d
d
ddg difdedededededededededededededededeee	f f  fddZ
	 d4d!ejd"eej d#ejfd$d%Zd&d' Zd(d) Zed*d+ fd,d-Zed.d/ Z	 d4d!ejd"eej d#ejfd0d1Zd2d3 Z  ZS )5ParallelWaveGANGeneratorz"Parallel WaveGAN Generator module.         @      P      g        TConvInUpsampleNetworkupsample_scales)   r   r   r   in_channelsout_channelskernel_sizelayersstacksresidual_channelsgate_channelsskip_channelsaux_channelsaux_context_windowdropout_ratebiasuse_weight_normupsample_conditional_featuresupsample_netupsample_paramsc                    sR  t    || _|| _|	| _|
| _|| _|| _|| _|| dks"J || }t	||dd| _
|rR|dkr<||	|
d tt|d
i || _tt|d | _nd| _|| _tj | _t|D ]}d||  }t|||||	|||dd		}|  j|g7  _qbtjtj t	||ddtj t	||ddg| _|r|   | | j dS )a  Initialize ParallelWaveGANGenerator module.

        Args:
            in_channels (int): Number of input channels.
            out_channels (int): Number of output channels.
            kernel_size (int): Kernel size of dilated convolution.
            layers (int): Number of residual block layers.
            stacks (int): Number of stacks i.e., dilation cycles.
            residual_channels (int): Number of channels in residual conv.
            gate_channels (int):  Number of channels in gated conv.
            skip_channels (int): Number of channels in skip conv.
            aux_channels (int): Number of channels for auxiliary feature conv.
            aux_context_window (int): Context window size for auxiliary feature.
            dropout_rate (float): Dropout rate. 0.0 means no dropout applied.
            bias (bool): Whether to use bias parameter in conv layer.
            use_weight_norm (bool): Whether to use weight norm.
                If set to true, it will be applied to all of the conv layers.
            upsample_conditional_features (bool): Whether to use upsampling network.
            upsample_net (str): Upsampling network architecture.
            upsample_params (Dict[str, Any]): Upsampling network parameters.

        r   T)r   r   )r   r   r   Nr   )	r   r   r   r   r   dilationr   r   scale_residual )super__init__r   r   r   r   r   r   r   r   
first_convupdategetattrr   r"   intnpprodupsample_factortorchnn
ModuleListconv_layersranger   ReLUlast_conv_layersapply_weight_norm"_register_load_state_dict_pre_hook_load_state_dict_pre_hook)selfr   r   r   r   r   r   r   r   r   r   r   r   r    r!   r"   r#   layers_per_stacklayerr$   conv	__class__r&   e/home/ubuntu/.local/lib/python3.10/site-packages/espnet2/gan_tts/parallel_wavegan/parallel_wavegan.pyr(      s^   
)
z!ParallelWaveGANGenerator.__init__Nczreturnc           
      C   s   |du r|  \}}}t|d|| j j|j|jd}| jdur2| |}| d| dks2J | |}d}| j	D ]}||d|d\}}	||	7 }q<|t
dt| j	 9 }|}| jD ]}||}q]|S )a  Calculate forward propagation.

        Args:
            c (Tensor): Local conditioning auxiliary features (B, C ,T_feats).
            z (Tensor): Input noise signal (B, 1, T_wav).

        Returns:
            Tensor: Output tensor (B, out_channels, T_wav)

        Nr
   )devicedtyper   )xx_maskrA   g      ?)sizer0   randnr/   torD   rE   r"   r)   r3   mathsqrtlenr6   )
r:   rA   rB   b_trG   skipsfhr&   r&   r@   forward   s$   






z ParallelWaveGANGenerator.forwardc                 C       dt jjfdd}| | dS ):Remove weight normalization module from all of the layers.mc                 S   <   zt d|  d tjj|  W d S  ty   Y d S w NzWeight norm is removed from .loggingdebugr0   r1   utilsremove_weight_norm
ValueErrorrX   r&   r&   r@   _remove_weight_norm      zHParallelWaveGANGenerator.remove_weight_norm.<locals>._remove_weight_normNr0   r1   Moduleapplyr:   rc   r&   r&   r@   r`         z+ParallelWaveGANGenerator.remove_weight_normc                 C   rV   )9Apply weight normalization module from all of the layers.rX   c                 S   D   t | tjjst | tjjr tjj|  td|  d d S d S NzWeight norm is applied to r[   	
isinstancer0   r1   r   Conv2dr_   weight_normr]   r^   rb   r&   r&   r@   _apply_weight_norm      zFParallelWaveGANGenerator.apply_weight_norm.<locals>._apply_weight_normNre   r:   rq   r&   r&   r@   r7         z*ParallelWaveGANGenerator.apply_weight_normc                 C   s   d|  S )Nr   r&   )rG   r&   r&   r@   <lambda>   s    z!ParallelWaveGANGenerator.<lambda>c                    sD   | | dksJ | |  fddt | D }|d t| d S )Nr   c                    s   g | ]} | qS r&   r&   ).0ir$   layers_per_cycler&   r@   
<listcomp>   s    zFParallelWaveGANGenerator._get_receptive_field_size.<locals>.<listcomp>r
   )r4   sum)r   r   r   r$   	dilationsr&   rx   r@   _get_receptive_field_size   s   z2ParallelWaveGANGenerator._get_receptive_field_sizec                 C   s   |  | j| j| jS )zReturn receptive field size.)r}   r   r   r   )r:   r&   r&   r@   receptive_field_size   s   z-ParallelWaveGANGenerator.receptive_field_sizec                 C   sF   |dur| ddd}| ddd}| ||d ddS )a  Perform inference.

        Args:
            c (Tensor): Local conditioning auxiliary features (T_feats ,C).
            z (Optional[Tensor]): Input noise signal (T_wav, 1).

        Returns:
            Tensor: Output tensor (T_wav, out_channels)

        Nr
   r   )	transpose	unsqueezerU   squeeze)r:   rA   rB   r&   r&   r@   	inference   s   z"ParallelWaveGANGenerator.inferencec                 C   sb   t | }|D ]&}	d|	|dv r.||	}
||	dd }tj||
gdd||	dd< qdS )z2Apply pre hook function before loading state dict.conv1x1_skip skipoutr   )dimN)listkeysreplacepopr0   cat)r:   
state_dictprefixlocal_metadatastrictmissing_keysunexpected_keys
error_msgsr   kv_skipv_outr&   r&   r@   r9      s   
z2ParallelWaveGANGenerator._load_state_dict_pre_hook)N)__name__
__module____qualname____doc__r,   floatboolstrr   r   r(   r0   Tensorr   rU   r`   r7   staticmethodr}   propertyr~   r   r9   __classcell__r&   r&   r>   r@   r	      s    
	

l
'


r	   c                       s   e Zd ZdZdddddddddid	d	f
d
edededededededeeef dedef fddZ	de
jde
jfddZdd Zdd Z  ZS )ParallelWaveGANDiscriminatorz&Parallel WaveGAN Discriminator module.r
   r   
   r   	LeakyReLUnegative_slopeg?Tr   r   r   r   conv_channelsdilation_factornonlinear_activationnonlinear_activation_paramsr   r    c              	      s  t    |d d dksJ d|dksJ dtj | _|}t|d D ];}|dkr0d}n|dkr6|n|| }|}|d d | }t||||||	dttj|dddi|g}|  j|7  _q'|d d }t|||||	d	}|  j|g7  _|
r| 	  d
S d
S )ag  Initialize ParallelWaveGANDiscriminator module.

        Args:
            in_channels (int): Number of input channels.
            out_channels (int): Number of output channels.
            kernel_size (int): Number of output channels.
            layers (int): Number of conv layers.
            conv_channels (int): Number of chnn layers.
            dilation_factor (int): Dilation factor. For example, if dilation_factor = 2,
                the dilation will be 2, 4, 8, ..., and so on.
            nonlinear_activation (str): Nonlinear function after each conv.
            nonlinear_activation_params (Dict[str, Any]): Nonlinear function parameters
            bias (bool): Whether to use bias parameter in conv.
            use_weight_norm (bool) Whether to use weight norm.
                If set to true, it will be applied to all of the conv layers.

        r
   r   r   z$Not support even number kernel size.zDilation factor must be > 0.)r   paddingr$   r   inplaceT)r   r   r   Nr&   )
r'   r(   r0   r1   r2   r3   r4   r   r+   r7   )r:   r   r   r   r   r   r   r   r   r   r    conv_in_channelsrw   r$   r   
conv_layerlast_conv_layerr>   r&   r@   r(      sL   
z%ParallelWaveGANDiscriminator.__init__rG   rC   c                 C   s   | j D ]}||}q|S )zCalculate forward propagation.

        Args:
            x (Tensor): Input noise signal (B, 1, T).

        Returns:
            Tensor: Output tensor (B, 1, T).

        )r3   )r:   rG   rS   r&   r&   r@   rU   >  s   


z$ParallelWaveGANDiscriminator.forwardc                 C   rV   )rj   rX   c                 S   rk   rl   rm   rb   r&   r&   r@   rq   O  rr   zJParallelWaveGANDiscriminator.apply_weight_norm.<locals>._apply_weight_normNre   rs   r&   r&   r@   r7   L  rt   z.ParallelWaveGANDiscriminator.apply_weight_normc                 C   rV   )rW   rX   c                 S   rY   rZ   r\   rb   r&   r&   r@   rc   Y  rd   zLParallelWaveGANDiscriminator.remove_weight_norm.<locals>._remove_weight_normNre   rh   r&   r&   r@   r`   V  ri   z/ParallelWaveGANDiscriminator.remove_weight_norm)r   r   r   r   r,   r   r   r   r   r(   r0   r   rU   r7   r`   r   r&   r&   r>   r@   r      sH    
	
F
r   )r   r]   rL   typingr   r   r   numpyr-   r0    espnet2.gan_tts.parallel_waveganr   &espnet2.gan_tts.wavenet.residual_blockr   r   r   r1   rf   r	   r   r&   r&   r&   r@   <module>   s    a