o
    i                     @   sd   d Z ddlmZmZmZ ddlZddlmZ ddlm	Z	 G dd dej
jZG dd	 d	ej
jZdS )
zhResidual affine coupling modules in VITS.

This code is based on https://github.com/jaywalnut310/vits.

    )OptionalTupleUnionN)FlipFlow)WaveNetc                       s   e Zd ZdZ											dd	ed
edededededededededef fddZ		d dej	dej	de
ej	 dedej	f
ddZ  ZS )!ResidualAffineCouplingBlockav  Residual affine coupling block module.

    This is a module of residual affine coupling block, which used as "Flow" in
    `Conditional Variational Autoencoder with Adversarial Learning for End-to-End
    Text-to-Speech`_.

    .. _`Conditional Variational Autoencoder with Adversarial Learning for End-to-End
        Text-to-Speech`: https://arxiv.org/abs/2006.04558

                        Tin_channelshidden_channelsflowskernel_sizebase_dilationlayersglobal_channelsdropout_rateuse_weight_normbiasuse_only_meanc                    sd   t    tj | _t|D ] }|  jt|||||d|||	|
|dg7  _|  jt g7  _qdS )a  Initilize ResidualAffineCouplingBlock module.

        Args:
            in_channels (int): Number of input channels.
            hidden_channels (int): Number of hidden channels.
            flows (int): Number of flows.
            kernel_size (int): Kernel size for WaveNet.
            base_dilation (int): Base dilation factor for WaveNet.
            layers (int): Number of layers of WaveNet.
            stacks (int): Number of stacks of WaveNet.
            global_channels (int): Number of global channels.
            dropout_rate (float): Dropout rate.
            use_weight_norm (bool): Whether to use weight normalization in WaveNet.
            bias (bool): Whether to use bias paramters in WaveNet.
            use_only_mean (bool): Whether to estimate only mean.

        r   )r   r   r   r   r   stacksr   r   r   r   r   N)	super__init__torchnn
ModuleListr   rangeResidualAffineCouplingLayerr   )selfr   r   r   r   r   r   r   r   r   r   r   i	__class__ Z/home/ubuntu/.local/lib/python3.10/site-packages/espnet2/gan_tts/vits/residual_coupling.pyr      s(   
z$ResidualAffineCouplingBlock.__init__NFxx_maskginversereturnc                 C   sL   |s| j D ]}|||||d\}}q|S t| j D ]
}|||||d}q|S )a}  Calculate forward propagation.

        Args:
            x (Tensor): Input tensor (B, in_channels, T).
            x_lengths (Tensor): Length tensor (B,).
            g (Optional[Tensor]): Global conditioning tensor (B, global_channels, 1).
            inverse (bool): Whether to inverse the flow.

        Returns:
            Tensor: Output tensor (B, in_channels, T).

        )r)   r*   )r   reversed)r!   r'   r(   r)   r*   flow_r%   r%   r&   forwardR   s   
z#ResidualAffineCouplingBlock.forward)r   r   r	   r
   r   r	   r   r   TTTNF)__name__
__module____qualname____doc__intfloatboolr   r   Tensorr   r/   __classcell__r%   r%   r#   r&   r      sb    	
8r   c                       s   e Zd ZdZ											dded	ed
ededededededededef fddZ		ddej	dej	de
ej	 dedeej	eej	ej	f f f
ddZ  ZS ) r    zResidual affine coupling layer.r   r
   r   r   r   Tr   r   r   r   r   r   r   r   r   r   r   c                    s  |d dks
J dt    |d | _|| _tj| j|d| _tdi ddddd|d	|d
|d|d|ddd|d d|d|d|d|
d|	dddddddd| _	|rltj|| jd| _
ntj|| jd d| _
| j
jj  | j
jj  dS )a  Initialzie ResidualAffineCouplingLayer module.

        Args:
            in_channels (int): Number of input channels.
            hidden_channels (int): Number of hidden channels.
            kernel_size (int): Kernel size for WaveNet.
            base_dilation (int): Base dilation factor for WaveNet.
            layers (int): Number of layers of WaveNet.
            stacks (int): Number of stacks of WaveNet.
            global_channels (int): Number of global channels.
            dropout_rate (float): Dropout rate.
            use_weight_norm (bool): Whether to use weight normalization in WaveNet.
            bias (bool): Whether to use bias paramters in WaveNet.
            use_only_mean (bool): Whether to estimate only mean.

           r   z$in_channels should be divisible by 2r   r   r   out_channelsr   r   r   r   residual_channelsaux_channelsgate_channelsskip_channelsr   r   r   r   use_first_convFuse_last_convscale_residualscale_skip_connectTNr%   )r   r   half_channelsr   r   r   Conv1d
input_convr   encoderprojweightdatazero_r   )r!   r   r   r   r   r   r   r   r   r   r   r   r#   r%   r&   r   q   sv   

	
z$ResidualAffineCouplingLayer.__init__NFr'   r(   r)   r*   r+   c                 C   s   |j |dd dd\}}| || }| j|||d}| || }| js6|j |dd dd\}	}
n|}	t|	}
|s^|	|t|
 |  }t	||gd}t
|
ddg}||fS ||	 t|
  | }t	||gd}|S )a  Calculate forward propagation.

        Args:
            x (Tensor): Input tensor (B, in_channels, T).
            x_lengths (Tensor): Length tensor (B,).
            g (Optional[Tensor]): Global conditioning tensor (B, global_channels, 1).
            inverse (bool): Whether to inverse the flow.

        Returns:
            Tensor: Output tensor (B, in_channels, T).
            Tensor: Log-determinant tensor for NLL (B,) if not inverse.

        r   r:   )dim)r)   )splitsizerF   rG   rH   r   r   
zeros_likeexpcatsum)r!   r'   r(   r)   r*   xaxbhstatsmlogslogdetr%   r%   r&   r/      s    
z#ResidualAffineCouplingLayer.forward)r   r   r
   r   r
   r   r   r   TTTr0   )r1   r2   r3   r4   r5   r6   r7   r   r   r8   r   r   r   r/   r9   r%   r%   r#   r&   r    n   sb    	
Pr    )r4   typingr   r   r   r   espnet2.gan_tts.vits.flowr   espnet2.gan_tts.wavenetr   r   Moduler   r    r%   r%   r%   r&   <module>   s   \