o
    ¡¿¯ig7  ã                   @   sÂ   d dl Z d dlZd dlZd dlZd dlm  mZ d dl	m
Z
mZ d dlmZ d dlmZmZ G dd„ dejjƒZG dd„ dejjƒZd	d
„ ZG dd„ dejjƒZG dd„ dejjƒZdd„ ZdS )é    N)Úpack_padded_sequenceÚpad_packed_sequence)Úget_vgg2l_odim)Úmake_pad_maskÚ	to_devicec                       ó,   e Zd ZdZd‡ fdd„	Zd	dd„Z‡  ZS )
ÚRNNPa†  RNN with projection layer module

    :param int idim: dimension of inputs
    :param int elayers: number of encoder layers
    :param int cdim: number of rnn units (resulted in cdim * 2 if bidirectional)
    :param int hdim: number of projection units
    :param np.ndarray subsample: list of subsampling numbers
    :param float dropout: dropout rate
    :param str typ: The RNN type
    Úblstmc                    sê   t t| ƒ ¡  |d dk}tj |¡D ]M}	|	dkr|}
n|}
d|v r&tjjntjj	}||
|d|dd}t
| d|r:dnd	|	f |ƒ |rSt
| d
|	 tj d| |¡ƒ qt
| d
|	 tj ||¡ƒ q|| _|| _|| _|| _|| _|| _d S )Nr   ÚbÚlstmé   T)Ú
num_layersÚbidirectionalÚbatch_firstz%s%dÚbirnnÚrnnúbt%dé   )Úsuperr   Ú__init__ÚsixÚmovesÚrangeÚtorchÚnnÚLSTMÚGRUÚsetattrÚLinearÚelayersÚcdimÚ	subsampleÚtypÚbidirÚdropout)ÚselfÚidimr   r    Úhdimr!   r$   r"   r#   ÚiÚinputdimÚRNNr   ©Ú	__class__© ú\/home/ubuntu/.local/lib/python3.10/site-packages/espnet/nets/pytorch_backend/rnn/encoders.pyr      s(   
ÿ 
zRNNP.__init__Nc                    sˆ  t  | jjd t|ƒ ¡ g }tj | j¡D ]¨}t	|t
jƒs#t
 |¡}t|| ¡ dd}t| | jr3dndt|ƒ ƒ}| jrA| ¡  |durL|jrLt|ƒ}|||du rTdn|| d\}}	| |	¡ t|dd\}
}| j|d  ‰ ˆ dkr‹|
dd…ddˆ …f }
t
 ‡ fd	d
„|D ƒ¡}t| d| ƒ}||
 ¡  d|
 d¡¡ƒ}| |
 d¡|
 d¡d¡}|| jd k r¾t
 tj|| jd¡}q|||fS )aX  RNNP forward

        :param torch.Tensor xs_pad: batch of padded input sequences (B, Tmax, idim)
        :param torch.Tensor ilens: batch of lengths of input sequences (B)
        :param torch.Tensor prev_state: batch of previous RNN states
        :return: batch of hidden state sequences (B, Tmax, hdim)
        :rtype: torch.Tensor
        ú input lengths: T©r   r   r   N©Úhxr   c                    s   g | ]
}t |d  ƒˆ  ‘qS ©r   )Úint)Ú.0r(   ©Úsubr-   r.   Ú
<listcomp>S   s    z RNNP.forward.<locals>.<listcomp>r   éÿÿÿÿr   r   )Úp)ÚloggingÚdebugr,   Ú__name__Ústrr   r   r   r   Ú
isinstancer   ÚTensorÚtensorr   ÚcpuÚgetattrr#   ÚtrainingÚflatten_parametersr   Úreset_backward_rnn_stateÚappendr   r!   Ú
contiguousÚviewÚsizeÚtanhÚFr$   )r%   Úxs_padÚilensÚ
prev_stateÚelayer_statesÚlayerÚxs_packr   ÚysÚstatesÚys_padÚprojection_layerÚ	projectedr-   r6   r.   Úforward6   s6   	

ÿ
€
zRNNP.forward©r	   ©N©r=   Ú
__module__Ú__qualname__Ú__doc__r   rX   Ú__classcell__r-   r-   r+   r.   r      s    r   c                       r   )
r*   a9  RNN module

    :param int idim: dimension of inputs
    :param int elayers: number of encoder layers
    :param int cdim: number of rnn units (resulted in cdim * 2 if bidirectional)
    :param int hdim: number of final projection units
    :param float dropout: dropout rate
    :param str typ: The RNN type
    r	   c                    sˆ   t t| ƒ ¡  |d dk}d|v rtjj|||d||dntjj|||d||d| _|r7tj |d |¡| _	ntj ||¡| _	|| _
d S )Nr   r
   r   T)r   r$   r   r   )r   r*   r   r   r   r   r   Únbrnnr   Úl_lastr"   )r%   r&   r   r    r'   r$   r"   r#   r+   r-   r.   r   i   s0   
øú	úö
zRNN.__init__Nc           	   
   C   sÒ   t  | jjd t|ƒ ¡ t|tjƒst |¡}t	|| 
¡ dd}| jr)| j ¡  |dur5| jjr5t|ƒ}| j||d\}}t|dd\}}t |  | ¡  d| d¡¡¡¡}| | d¡| d	¡d¡}|||fS )
aV  RNN forward

        :param torch.Tensor xs_pad: batch of padded input sequences (B, Tmax, D)
        :param torch.Tensor ilens: batch of lengths of input sequences (B)
        :param torch.Tensor prev_state: batch of previous RNN states
        :return: batch of hidden state sequences (B, Tmax, eprojs)
        :rtype: torch.Tensor
        r/   Tr0   Nr1   r9   r   r   r   )r;   r<   r,   r=   r>   r?   r   r@   rA   r   rB   rD   r`   rE   r   rF   r   rK   ra   rH   rI   rJ   )	r%   rM   rN   rO   rR   rS   rT   rU   rW   r-   r-   r.   rX   …   s   	

ÿ
zRNN.forwardrY   rZ   r[   r-   r-   r+   r.   r*   ^   s    
r*   c                 C   s<   t | ttfƒr| D ]	}d|ddd…< q	| S d| ddd…< | S )zeSets backward BRNN states to zeroes

    Useful in processing of sliding windows over the inputs
    ç        r   Nr   )r?   ÚlistÚtuple)rT   Ústater-   r-   r.   rF   ¥   s   ÿrF   c                       s*   e Zd ZdZd‡ fdd„	Zdd„ Z‡  ZS )ÚVGG2LzIVGG-like module

    :param int in_channel: number of input channels
    r   c                    sx   t t| ƒ ¡  tjj|ddddd| _tjjdddddd| _tjjdddddd| _tjjdddddd| _	|| _
d S )Né@   é   r   )ÚstrideÚpaddingé€   )r   rf   r   r   r   ÚConv2dÚconv1_1Úconv1_2Úconv2_1Úconv2_2Ú
in_channel)r%   rq   r+   r-   r.   r   ¸   s   
zVGG2L.__init__c                 K   s`  t  | jjd t|ƒ ¡ | | d¡| d¡| j| d¡| j ¡ dd¡}t	 
|  |¡¡}t	 
|  |¡¡}t	j|dddd}t	 
|  |¡¡}t	 
|  |¡¡}t	j|dddd}t |¡rd| ¡  ¡ }ntj|tjd}tjt |d ¡tjd}tjt tj|tjdd ¡tjd ¡ }| dd¡}| ¡  | d¡| d¡| d¡| d¡ ¡}||d	fS )
a%  VGG2L forward

        :param torch.Tensor xs_pad: batch of padded input sequences (B, Tmax, D)
        :param torch.Tensor ilens: batch of lengths of input sequences (B)
        :return: batch of padded hidden state sequences (B, Tmax // 4, 128 * D // 4)
        :rtype: torch.Tensor
        r/   r   r   r   T)ri   Ú	ceil_mode)Údtyperh   N)r;   r<   r,   r=   r>   rI   rJ   rq   Ú	transposerL   Úrelurm   rn   Ú
max_pool2dro   rp   r   Ú	is_tensorrB   ÚnumpyÚnpÚarrayÚfloat32ÚceilÚint64ÚtolistrH   )r%   rM   rN   Úkwargsr-   r-   r.   rX   Â   s:   üû
ÿþ"ÿ
zVGG2L.forwardr3   r[   r-   r-   r+   r.   rf   ²   s    
rf   c                       s.   e Zd ZdZ	d‡ fdd„	Zd	dd„Z‡  ZS )
ÚEncodera×  Encoder module

    :param str etype: type of encoder network
    :param int idim: number of dimensions of encoder network
    :param int elayers: number of layers of encoder network
    :param int eunits: number of lstm units of encoder network
    :param int eprojs: number of projection units of encoder network
    :param np.ndarray subsample: list of subsampling numbers
    :param float dropout: dropout rate
    :param int in_channel: number of input channels
    r   c	           
         sZ  t t| ƒ ¡  | d¡ d¡}	|	dvrt d¡ | d¡rn|d dkrGtj	 
t|ƒtt||d||||||	dg¡| _t d|	 ¡  d	 ¡ n"tj	 
t|ƒtt||d|||||	dg¡| _t d|	 ¡  d
 ¡ d| _d S |d dkrtj	 
t|||||||	dg¡| _t |	 ¡ d ¡ ntj	 
t||||||	dg¡| _t |	 ¡ d ¡ d| _d S )NÚvggr:   )r   Úgrur	   Úbgruz:Error: need to specify an appropriate encoder architecturer9   )rq   )r"   zUse CNN-VGG + zP for encoderz for encoderé   z( with every-layer projection for encoderz without projection for encoderr   )r   r€   r   ÚlstripÚrstripr;   ÚerrorÚ
startswithr   r   Ú
ModuleListrf   r   r   ÚencÚinfoÚupperr*   Úconv_subsampling_factor)
r%   Úetyper&   r   ÚeunitsÚeprojsr!   r$   rq   r"   r+   r-   r.   r   ý   sX   


ùþÿ
úþÿ
ÿÿ
zEncoder.__init__Nc           	      C   sŒ   |du rdgt | jƒ }t |ƒt | jƒksJ ‚g }t| j|ƒD ]\}}||||d\}}}| |¡ qt|t|ƒ d¡ƒ}| |d¡||fS )an  Encoder forward

        :param torch.Tensor xs_pad: batch of padded input sequences (B, Tmax, D)
        :param torch.Tensor ilens: batch of lengths of input sequences (B)
        :param torch.Tensor prev_state: batch of previous encoder hidden states (?, ...)
        :return: batch of hidden state sequences (B, Tmax, eprojs)
        :rtype: torch.Tensor
        N)rO   r9   rb   )ÚlenrŠ   ÚziprG   r   r   Ú	unsqueezeÚmasked_fill)	r%   rM   rN   Úprev_statesÚcurrent_statesÚmodulerO   rT   Úmaskr-   r-   r.   rX   3  s   	zEncoder.forwardr3   rZ   r[   r-   r-   r+   r.   r€   ð   s
    ÿ6r€   c              
   C   s¤   t | ddƒ}|dkrt| j|| j| j| j|| jƒS |dkrKtj 	¡ }t
|ƒD ]"}t| j| || | j| | j| | j|| | j| ƒ}| |¡ q&|S td |¡ƒ‚)a.  Instantiates an encoder module given the program arguments

    :param Namespace args: The arguments
    :param int or List of integer idim: dimension of input, e.g. 83, or
                                        List of dimensions of inputs, e.g. [83,83]
    :param List or List of List subsample: subsample factors, e.g. [1,2,2,1,1], or
                                        List of subsample factors of each encoder.
                                         e.g. [[1,2,2,1,1], [1,2,2,1,1]]
    :rtype torch.nn.Module
    :return: The encoder module
    Únum_encsr   z0Number of encoders needs to be more than one. {})rC   r€   rŽ   r   r   r   Údropout_rater   r   r‰   r   rG   Ú
ValueErrorÚformat)Úargsr&   r!   r™   Úenc_listÚidxrŠ   r-   r-   r.   Úencoder_forK  s8   ù	
ù	ÿr    )r;   rx   ry   r   r   Útorch.nn.functionalr   Ú
functionalrL   Útorch.nn.utils.rnnr   r   Úespnet.nets.e2e_asr_commonr   Ú&espnet.nets.pytorch_backend.nets_utilsr   r   ÚModuler   r*   rF   rf   r€   r    r-   r-   r-   r.   Ú<module>   s    QG>[