o
    ¡¿¯ir6  ã                   @   s²   d Z ddlZddlZddlZddlZddlZddlm  m	Z
 ddlmZ ddd„Zddd„Zd	d
„ ZG dd„ dejƒZG dd„ dejƒZG dd„ dejƒZG dd„ dejƒZdS )zKThis code is based on https://github.com/kan-bayashi/PytorchWaveNetVocoder.é    N)Únné   c                 C   sZ   |d }t  | ¡t  d|t  | ¡  ¡ t  d| ¡ }t  |d d | d ¡ t j¡S )zâPerform mu-law encoding.

    Args:
        x (ndarray): Audio signal with the range from -1 to 1.
        mu (int): Quantized level.

    Returns:
        ndarray: Quantized audio signal with the range from 0 to mu - 1.

    é   é   ç      à?)ÚnpÚsignÚlogÚabsÚfloorÚastypeÚint64)ÚxÚmuÚfx© r   úW/home/ubuntu/.local/lib/python3.10/site-packages/espnet/nets/pytorch_backend/wavenet.pyÚencode_mu_law   s   0"r   c                 C   sD   |d }| d | d d }t  |¡| d| t  |¡ d  }|S )zâPerform mu-law decoding.

    Args:
        x (ndarray): Quantized audio signal with the range from 0 to mu - 1.
        mu (int): Quantized level.

    Returns:
        ndarray: Audio signal with the range from -1 to 1.

    r   r   r   )r   r   r
   )Úyr   r   r   r   r   r   Údecode_mu_law"   s   $r   c                 C   s^   t | tjƒrtj | j¡ tj | jd¡ t | tjƒr-tj | jd¡ tj | jd¡ dS dS )z^Initilize conv layers with xavier.

    Args:
        m (torch.nn.Module): Torch module.

    g        g      ð?N)	Ú
isinstancer   ÚConv1dÚinitÚxavier_uniform_ÚweightÚ	constant_ÚbiasÚConvTranspose2d)Úmr   r   r   Ú
initialize3   s   þr   c                       s(   e Zd ZdZ‡ fdd„Zdd„ Z‡  ZS )ÚOneHotz]Convert to one-hot vector.

    Args:
        depth (int): Dimension of one-hot vector.

    c                    s   t t| ƒ ¡  || _d S ©N)Úsuperr    Ú__init__Údepth)Úselfr$   ©Ú	__class__r   r   r#   K   s   
zOneHot.__init__c                 C   sD   || j  }t |d¡}| | d¡| d¡| j ¡ ¡ }| d|d¡S )zØCalculate forward propagation.

        Args:
            x (LongTensor): long tensor variable with the shape  (B, T)

        Returns:
            Tensor: float tensor variable with the shape (B, depth, T)

        r   r   r   )r$   ÚtorchÚ	unsqueezeÚ	new_zerosÚsizeÚfloatÚscatter_)r%   r   Úx_onehotr   r   r   ÚforwardO   s   

 zOneHot.forward©Ú__name__Ú
__module__Ú__qualname__Ú__doc__r#   r/   Ú__classcell__r   r   r&   r   r    C   s    r    c                       s*   e Zd ZdZd‡ fdd„	Zdd„ Z‡  ZS )	ÚCausalConv1dz1D dilated causal convolution.r   Tc                    sT   t t| ƒ ¡  || _|| _|| _|| _|d |  | _}tj	||||||d| _
d S )Nr   )ÚpaddingÚdilationr   )r"   r6   r#   Úin_channelsÚout_channelsÚkernel_sizer8   r7   r   r   Úconv)r%   r9   r:   r;   r8   r   r7   r&   r   r   r#   c   s   úzCausalConv1d.__init__c                 C   s6   |   |¡}| jdkr|dd…dd…d| j …f }|S )zÑCalculate forward propagation.

        Args:
            x (Tensor): Input tensor with the shape (B, in_channels, T).

        Returns:
            Tensor: Tensor with the shape (B, out_channels, T)

        r   N)r<   r7   ©r%   r   r   r   r   r/   s   s   


zCausalConv1d.forward)r   Tr0   r   r   r&   r   r6   `   s    r6   c                       s*   e Zd ZdZd‡ fdd„	Zdd„ Z‡  ZS )Ú
UpSamplingziUpsampling layer with deconvolution.

    Args:
        upsampling_factor (int): Upsampling factor.

    Tc                    sB   t t| ƒ ¡  || _|| _tjddd| jfd| jf| jd| _d S )Nr   )r;   Ústrider   )r"   r>   r#   Úupsampling_factorr   r   r   r<   )r%   r@   r   r&   r   r   r#   ‹   s   ûzUpSampling.__init__c                 C   s   |  d¡}|  |¡}| d¡S )zßCalculate forward propagation.

        Args:
            x (Tensor): Input tensor with the shape  (B, C, T)

        Returns:
            Tensor: Tensor with the shape (B, C, T') where T' = T * upsampling_factor.

        r   )r)   r<   Úsqueezer=   r   r   r   r/   —   s   



zUpSampling.forward)Tr0   r   r   r&   r   r>   ƒ   s    r>   c                       sd   e Zd ZdZ								d‡ fd	d
„	Zdd„ Zddd„Zdd„ Zdd„ Zdd„ Z	dd„ Z
‡  ZS )ÚWaveNeta;  Conditional wavenet.

    Args:
        n_quantize (int): Number of quantization.
        n_aux (int): Number of aux feature dimension.
        n_resch (int): Number of filter channels for residual block.
        n_skipch (int): Number of filter channels for skip connection.
        dilation_depth (int): Number of dilation depth
            (e.g. if set 10, max dilation = 2^(10-1)).
        dilation_repeat (int): Number of dilation repeat.
        kernel_size (int): Filter size of dilated causal convolution.
        upsampling_factor (int): Upsampling factor.

    r   é   é   é
   é   r   r   c	           
         sÖ  t t| ƒ ¡  || _|| _|| _|| _|| _|| _|| _	|| _
dd„ t| jƒD ƒ| j	 | _| jd t| jƒ d | _t| jƒ| _t| j| j| jƒ| _| j
dkrUt| j
ƒ| _t ¡ | _t ¡ | _t ¡ | _t ¡ | _t ¡ | _t ¡ | _| jD ]^}	|  jt| j| j| j|	ƒg7  _|  jt| j| j| j|	ƒg7  _|  jt | j| jd¡g7  _|  jt | j| jd¡g7  _|  jt | j| jd¡g7  _|  jt | j| jd¡g7  _qvt | j| jd¡| _t | j| jd¡| _d S )Nc                 S   s   g | ]}d | ‘qS )r   r   )Ú.0Úir   r   r   Ú
<listcomp>Ë   s    ÿz$WaveNet.__init__.<locals>.<listcomp>r   r   ) r"   rB   r#   Ún_auxÚ
n_quantizeÚn_reschÚn_skipchr;   Údilation_depthÚdilation_repeatr@   ÚrangeÚ	dilationsÚsumÚreceptive_fieldr    Úonehotr6   Úcausalr>   Ú
upsamplingr   Ú
ModuleListÚdil_sigmoidÚdil_tanhÚaux_1x1_sigmoidÚaux_1x1_tanhÚskip_1x1Úres_1x1r   Úconv_post_1Úconv_post_2)
r%   rK   rJ   rL   rM   rN   rO   r;   r@   Údr&   r   r   r#   ¶   sL   ÿþ







ÿÿ zWaveNet.__init__c                 C   s–   |   |¡}| jdkr|  |¡}g }tt| jƒƒD ]'}|  ||| j| | j| | j	| | j
| | j| | j| ¡\}}| |¡ qt|ƒ}|  |¡}|S )a+  Calculate forward propagation.

        Args:
            x (LongTensor): Quantized input waveform tensor with the shape  (B, T).
            h (Tensor): Auxiliary feature tensor with the shape  (B, n_aux, T).

        Returns:
            Tensor: Logits with the shape (B, T, n_quantize).

        r   )Ú_preprocessr@   rV   rP   ÚlenrQ   Ú_residual_forwardrX   rY   rZ   r[   r\   r]   ÚappendrR   Ú_postprocess)r%   r   ÚhÚoutputÚskip_connectionsrH   Úskipr   r   r   r/   í   s&   


ø

zWaveNet.forwardNÚsamplingc                 C   sÖ  t |jƒdks	J ‚t |jƒdkr|jd | jksJ ‚| d¡}| dd¡ d¡}| jdkr2|  |¡}||jd krGt |d||jd  fd¡}| j	| 
d¡ }|dkrit ||dfd| jd ¡}t ||dfd¡}|  |¡}|dd…dd…d| 
d¡…f }g }	g }
t| jƒD ]W\}}|  ||| j| | j| | j| | j| | j| | j| ¡\}}|d| jd  kr½|
 | jd ¡ n|
 |d | jd  ¡ |	 |dd…dd…|
|  d d…f ¡ q‡|d }t ¡ }t|ƒD ]ô}|| j d d d…  d¡}|  |¡}|dd…dd…| 
d¡d f  ¡  d| jd¡}g }g }t| jƒD ]H\}}|  ||| j| | j| | j| | j| | j| | j| ¡\}}tj|	| |gdd}| |dd…dd…|
|  d…f ¡ | |¡ q"|}	t |ƒ}|  !|¡d }|d	kr”tj"|d dd}tj# $|¡}| %¡  d¡}n|d
krŸ| &d¡}n
t' (d¡ t) *d¡ tj||gdd}|durß|d | dkrßt ¡ | | }t' +d|d ||| d | |f ¡ t ¡ }që|| d…  ,¡  -¡ S )a‡  Generate a waveform with fast genration algorithm.

        This generation based on `Fast WaveNet Generation Algorithm`_.

        Args:
            x (LongTensor): Initial waveform tensor with the shape  (T,).
            h (Tensor): Auxiliary feature tensor with the shape  (n_samples + T, n_aux).
            n_samples (int): Number of samples to be generated.
            interval (int, optional): Log interval.
            mode (str, optional): "sampling" or "argmax".

        Return:
            ndarray: Generated quantized waveform (n_samples).

        .. _`Fast WaveNet Generation Algorithm`: https://arxiv.org/abs/1611.09482

        r   r   r   Ú	replicateÚconstantNéÿÿÿÿ)Údimrj   Úargmaxz!mode should be sampling or argmaxz3%d/%d estimated time = %.3f sec (%.3f sec / sample)).rb   ÚshaperJ   r)   Ú	transposer@   rV   ÚFÚpadrS   r+   rK   ra   Ú	enumeraterQ   rc   rX   rY   rZ   r[   r\   r]   rN   rd   r;   ÚtimerP   Ú
contiguousÚviewÚ_generate_residual_forwardr(   ÚcatrR   re   ÚsoftmaxÚdistributionsÚCategoricalÚsamplero   ÚloggingÚerrorÚsysÚexitÚinfoÚcpuÚnumpy)r%   r   rf   Ú	n_samplesÚintervalÚmodeÚn_padrg   Úh_Úoutput_bufferÚbuffer_sizerH   r`   Ú_ÚsamplesÚ
start_timeÚoutput_buffer_nextrh   Újri   Ú	posteriorÚdistr}   Úelapsed_time_per_sampler   r   r   Úgenerate  sž   "



 ø
,
0ø
&



üÿÿ	€zWaveNet.generatec                 C   s    |   |¡ dd¡}|  |¡}|S ©Nr   r   )rT   rq   rU   ©r%   r   rg   r   r   r   ra   ‡  s   
zWaveNet._preprocessc                 C   s4   t  |¡}|  |¡}t  |¡}|  |¡ dd¡}|S r•   )rr   Úrelur^   r_   rq   r–   r   r   r   re   Œ  s
   


zWaveNet._postprocessc	                 C   s\   ||ƒ}	||ƒ}
||ƒ}||ƒ}t  |	| ¡t  |
| ¡ }||ƒ}||ƒ}|| }||fS r!   ©r(   ÚsigmoidÚtanh©r%   r   rf   rX   rY   rZ   r[   r\   r]   Úoutput_sigmoidÚoutput_tanhÚaux_output_sigmoidÚaux_output_tanhrg   ri   r   r   r   rc   “  s   ÿzWaveNet._residual_forwardc	                 C   sž   ||ƒd d …d d …dd …f }	||ƒd d …d d …dd …f }
||ƒ}||ƒ}t  |	| ¡t  |
| ¡ }||ƒ}||ƒ}||d d …d d …dd …f  }||fS )Nrm   r˜   r›   r   r   r   rx   ª  s   ÿz"WaveNet._generate_residual_forward)r   rC   rD   r   rE   rF   r   r   )Nrj   )r1   r2   r3   r4   r#   r/   r”   ra   re   rc   rx   r5   r   r   r&   r   rB   ¦   s"    ÷7
%urB   )r   )r4   r~   r€   ru   r„   r   r(   Útorch.nn.functionalr   Ú
functionalrr   r   r   r   ÚModuler    r6   r>   rB   r   r   r   r   Ú<module>   s   

##