o
    ¡¿¯il#  ã                   @   sz   d Z ddlZddlm  mZ ddlmZmZ ddl	m
Z
 G dd„ dejjƒZG dd„ dejjƒZG d	d
„ d
ejjƒZdS )zCBHG related modules.é    N)Úpack_padded_sequenceÚpad_packed_sequence)Úmake_non_pad_maskc                       s*   e Zd ZdZd‡ fdd„	Zdd„ Z‡  ZS )ÚCBHGLosszLoss function module for CBHG.Tc                    s   t t| ƒ ¡  || _dS )z†Initialize CBHG loss module.

        Args:
            use_masking (bool): Whether to mask padded part in loss calculation.

        N)Úsuperr   Ú__init__Úuse_masking)Úselfr   ©Ú	__class__© ú^/home/ubuntu/.local/lib/python3.10/site-packages/espnet/nets/pytorch_backend/tacotron2/cbhg.pyr      s   
zCBHGLoss.__init__c                 C   sP   | j rt|ƒ d¡ |j¡}| |¡}| |¡}t ||¡}t ||¡}||fS )a†  Calculate forward propagation.

        Args:
            cbhg_outs (Tensor): Batch of CBHG outputs (B, Lmax, spc_dim).
            spcs (Tensor): Batch of groundtruth of spectrogram (B, Lmax, spc_dim).
            olens (LongTensor): Batch of the lengths of each sequence (B,).

        Returns:
            Tensor: L1 loss value
            Tensor: Mean square error loss value.

        éÿÿÿÿ)	r   r   Ú	unsqueezeÚtoÚdeviceÚmasked_selectÚFÚl1_lossÚmse_loss)r	   Ú	cbhg_outsÚspcsÚolensÚmaskÚcbhg_l1_lossÚcbhg_mse_lossr   r   r   Úforward   s   

zCBHGLoss.forward)T©Ú__name__Ú
__module__Ú__qualname__Ú__doc__r   r   Ú__classcell__r   r   r
   r   r      s    
r   c                       sP   e Zd ZdZ							d‡ fdd„	Zd	d
„ Zdd„ Zdd„ Zdd„ Z‡  Z	S )ÚCBHGa`  CBHG module to convert log Mel-filterbanks to linear spectrogram.

    This is a module of CBHG introduced
    in `Tacotron: Towards End-to-End Speech Synthesis`_.
    The CBHG converts the sequence of log Mel-filterbanks into linear spectrogram.

    .. _`Tacotron: Towards End-to-End Speech Synthesis`:
         https://arxiv.org/abs/1703.10135

    é   é€   é   é   é   c
                    s  t t| ƒ ¡  || _|| _|| _|| _|| _|| _|| _	|| _
|	| _tj ¡ | _td| jd ƒD ]D}
|
d dkr?|
d d }n|
d d |
d d d f}|  jtj tj |d¡tjj|| j|
ddddtj | j¡tj ¡ ¡g7  _q0tj tj dd¡tjjddd¡| _tj tjj| j| j | j| jd| jd d ddtj | j¡tj ¡ tjj| j| j| jd| jd d ddtj | j¡¡| _tj ¡ | _|  jtj || j
¡g7  _t| j	ƒD ]}|  jt| j
ƒg7  _qàtjj| j
|	d dddd	| _tjj|	|dd
| _dS )a  Initialize CBHG module.

        Args:
            idim (int): Dimension of the inputs.
            odim (int): Dimension of the outputs.
            conv_bank_layers (int, optional): The number of convolution bank layers.
            conv_bank_chans (int, optional): The number of channels in convolution bank.
            conv_proj_filts (int, optional):
                Kernel size of convolutional projection layer.
            conv_proj_chans (int, optional):
                The number of channels in convolutional projection layer.
            highway_layers (int, optional): The number of highway network layers.
            highway_units (int, optional): The number of highway network units.
            gru_units (int, optional): The number of GRU units (for both directions).

        é   é   r   g        T)ÚstrideÚpaddingÚbias)r   r)   )r+   )Ú
num_layersÚbatch_firstÚbidirectional)r-   N)r   r#   r   ÚidimÚodimÚconv_bank_layersÚconv_bank_chansÚconv_proj_filtsÚconv_proj_chansÚhighway_layersÚhighway_unitsÚ	gru_unitsÚtorchÚnnÚ
ModuleListÚ	conv_bankÚrangeÚ
SequentialÚConstantPad1dÚConv1dÚBatchNorm1dÚReLUÚ	MaxPool1dÚmax_poolÚprojectionsÚhighwaysÚLinearÚ
HighwayNetÚGRUÚgruÚoutput)r	   r1   r2   r3   r4   r5   r6   r7   r8   r9   Úkr,   Ú_r
   r   r   r   C   sz   ÿú
ÿÿ
úúíû	zCBHG.__init__c           	      C   s"  |  dd¡}g }t| jƒD ]}|| j| |ƒg7 }qtj|dd}|  |¡}|  |¡  dd¡}|  dd¡| }t| jd ƒD ]	}| j	| |ƒ}q>|  
||¡\}}}| d¡}t|tjƒsat |¡}t|| ¡ dd}| j ¡  |  |¡\}}t|d|d\}}|  |||¡\}}|  |¡}||fS )a|  Calculate forward propagation.

        Args:
            xs (Tensor): Batch of the padded sequences of inputs (B, Tmax, idim).
            ilens (LongTensor): Batch of lengths of each input sequence (B,).

        Return:
            Tensor: Batch of the padded sequence of outputs (B, Tmax, odim).
            LongTensor: Batch of lengths of each output sequence (B,).

        r)   r*   )ÚdimT)r/   )r/   Útotal_length)Ú	transposer>   r3   r=   r:   ÚcatrE   rF   r7   rG   Ú_sort_by_lengthÚsizeÚ
isinstanceÚTensorÚtensorr   ÚcpurK   Úflatten_parametersr   Ú_revert_sort_by_lengthrL   )	r	   ÚxsÚilensÚconvsrM   ÚiÚsort_idxrP   rN   r   r   r   r   ª   s*   




zCBHG.forwardc                 C   sH   t | ¡ ƒdks
J ‚| d¡}| | d¡g¡ ¡ }|  ||¡d d S )z¦Inference.

        Args:
            x (Tensor): The sequences of inputs (T, idim).

        Return:
            Tensor: The sequence of outputs (T, odim).

        r*   r   )ÚlenrT   r   ÚnewÚlongr   )r	   Úxr[   r\   r   r   r   Ú	inferenceÖ   s   

zCBHG.inferencec                 C   s$   |j ddd\}}|| || |fS )Nr   T)Ú
descending©Úsort)r	   r[   r\   Ú
sort_ilensr_   r   r   r   rS   æ   s   zCBHG._sort_by_lengthc                 C   s   |  d¡\}}|| || fS )Nr   rf   )r	   r[   r\   r_   rN   Ú
revert_idxr   r   r   rZ   ê   s   zCBHG._revert_sort_by_length)r$   r%   r&   r'   r(   r%   r'   )
r   r   r    r!   r   r   rd   rS   rZ   r"   r   r   r
   r   r#   7   s    ög,r#   c                       s(   e Zd ZdZ‡ fdd„Zdd„ Z‡  ZS )rI   z§Highway Network module.

    This is a module of Highway Network introduced in `Highway Networks`_.

    .. _`Highway Networks`: https://arxiv.org/abs/1505.00387

    c                    sX   t t| ƒ ¡  || _tj tj ||¡tj ¡ ¡| _	tj tj ||¡tj 
¡ ¡| _dS )zlInitialize Highway Network module.

        Args:
            idim (int): Dimension of the inputs.

        N)r   rI   r   r1   r:   r;   r?   rH   rC   Ú
projectionÚSigmoidÚgate)r	   r1   r
   r   r   r   ø   s   ÿ$zHighwayNet.__init__c                 C   s(   |   |¡}|  |¡}|| |d|   S )zÚCalculate forward propagation.

        Args:
            x (Tensor): Batch of inputs (B, ..., idim).

        Returns:
            Tensor: Batch of outputs, which are the same shape as inputs (B, ..., idim).

        g      ð?)rj   rl   )r	   rc   Úprojrl   r   r   r   r     s   


zHighwayNet.forwardr   r   r   r
   r   rI   ï   s    rI   )r!   r:   Útorch.nn.functionalr;   Ú
functionalr   Útorch.nn.utils.rnnr   r   Ú&espnet.nets.pytorch_backend.nets_utilsr   ÚModuler   r#   rI   r   r   r   r   Ú<module>   s   ' 9