o
    i5+                     @   s   d dl Z d dlZd dlmZ d dlmZ d dlZ	d dl
Z
d dlmZ d dlmZ d dlmZ G dd dejZG dd dejZG d	d
 d
ejZG dd dejZdd ZdS )    N)cuda)_subsamplex)get_vgg2l_odimc                       *   e Zd ZdZd fdd	Zdd Z  ZS )RNNPa  RNN with projection layer module.

    Args:
        idim (int): Dimension of inputs.
        elayers (int): Number of encoder layers.
        cdim (int): Number of rnn units. (resulted in cdim * 2 if bidirectional)
        hdim (int): Number of projection units.
        subsample (np.ndarray): List to use sabsample the input array.
        dropout (float): Dropout rate.
        typ (str): The RNN type.

    blstmc              
      s  t t|   |d dk}|rd|v rtjntj}	n
d|v r!tjntj}	|r(dnd}
|  = t	j
|D ].}|dkr>|}n|}|rFd| n|}t| d|
||	d||| t| d	| t|| q5W d    n1 snw   Y  || _|
| _|| _|| _|| _|| _d S )
Nr   blstmbirnnrnn   z{}{:d}   zbt%d)superr   __init__LNStepBiLSTM
NStepBiGRU	NStepLSTMNStepGRU
init_scopesixmovesrangesetattrformatLinearelayers	rnn_labelcdim	subsampletypbidir)selfidimr   r   hdimr   dropoutr    r!   r   r   iinputdim_cdim	__class__ \/home/ubuntu/.local/lib/python3.10/site-packages/espnet/nets/chainer_backend/rnn/encoders.pyr      s0   

zRNNP.__init__c                 C   s  t | jjd t|  tj| jD ]N}d| j	v r,| | j
t|  dd|\}}}n| | j
t|  d|\}}t|| j|d  \}}| dt|  t|}tj|t|dd dd}qtjtt|t|dd dd}t|ts|g}||fS )	aI  RNNP forward.

        Args:
            xs (chainer.Variable): Batch of padded character ids. (B, Tmax)
            ilens (chainer.Variable): Batch of length of each input batch. (B,)

        Returns:
            xs (chainer.Variable):subsampled vector of xs.
            chainer.Variable: Subsampled vector of ilens.

         input lengths: r	   Nr   btr   axis)logginginfor*   __name__strr   r   r   r   r    r   r   r   Fvstack
split_axisnpcumsumtanh
isinstancetuple)r"   xsilenslayer_ysr+   r+   r,   __call__9   s   
" *
zRNNP.__call__)r   r4   
__module____qualname____doc__r   rC   __classcell__r+   r+   r)   r,   r      s    r   c                       r   )RNNa  RNN Module.

    Args:
        idim (int): Dimension of the imput.
        elayers (int): Number of encoder layers.
        cdim (int): Number of rnn units.
        hdim (int): Number of projection units.
        dropout (float): Dropout rate.
        typ (str): Rnn type.

    r	   c           
         s   t t|   |d dk}|rd|v rtjntj}n
d|v r!tjntj}|r*d| n|}	|   |||||| _	t
|	|| _W d    n1 sJw   Y  || _|| _d S )Nr   r   r	   r   )r   rI   r   r   r   r   r   r   r   nbrnnr   l_lastr    r!   )
r"   r#   r   r   r$   r%   r    r!   r   r(   r)   r+   r,   r   j   s   

zRNN.__init__c                 C   s   t | jjd t|  t|}d| jv r"| dd|\}}}n| d|\}}| 	t
|}t
j|t|dd dd}t
jt
t
|t|dd dd}t|ts^|g}||fS )aR  BRNN forward propagation.

        Args:
            xs (chainer.Variable): Batch of padded character ids. (B, Tmax)
            ilens (chainer.Variable): Batch of length of each input batch. (B,)

        Returns:
            tuple(chainer.Variable): Tuple of `chainer.Variable` objects.
            chainer.Variable: `ilens` .

        r-   r	   Nr/   r   r0   )r2   r3   r*   r4   r5   r   to_cpur    rJ   rK   r6   r7   r8   r9   r:   r;   r<   r=   )r"   r>   r?   rA   rB   r+   r+   r,   rC   x   s   

*
zRNN.__call__)r	   rD   r+   r+   r)   r,   rI   ]   s    rI   c                       r   )VGG2LzXVGG motibated cnn layers.

    Args:
        in_channel (int): Number of channels.

    r   c                    s   t t|   |  4 tj|ddddd| _tjdddddd| _tjdddddd| _tjdddddd| _	W d    n1 sBw   Y  || _
d S )N@      r   )stridepad   )r   rM   r   r   r   Convolution2Dconv1_1conv1_2conv2_1conv2_2
in_channel)r"   rX   r)   r+   r,   r      s   

zVGG2L.__init__c              	      sl  t | jjd t   ttj	d j	d | j
j	d | j
 ddt| t| tjdddt| t| tjddd| jj| j| jj tjdd tjd | jj| j| jj tjdd tjd tddj	d j	d j	d j	d   fdd	tt D  fS )
aO  VGG2L forward propagation.

        Args:
            xs (chainer.Variable): Batch of padded character ids. (B, Tmax)
            ilens (chainer.Variable): Batch of length of each features. (B,)

        Returns:
            chainer.Variable: Subsampled vector of xs.
            chainer.Variable: Subsampled vector of ilens.

        r-   r   r   r   )rP   )dtyperO   c                    s&   g | ]}|d  | d d f qS )Nr+   ).0r&   r?   r>   r+   r,   
<listcomp>   s   & z"VGG2L.__call__.<locals>.<listcomp>)r2   r3   r*   r4   r5   r6   pad_sequenceswapaxesreshapeshaperX   relurT   rU   max_pooling_2drV   rW   xparrayceilr9   float32int32r   lenr"   r>   r?   r+   r[   r,   rC      s8   
  *zVGG2L.__call__r   rD   r+   r+   r)   r,   rM      s    rM   c                       s,   e Zd ZdZ	d fdd	Zdd Z  ZS )Encodera  Encoder network class.

    Args:
        etype (str): Type of encoder network.
        idim (int): Number of dimensions of encoder network.
        elayers (int): Number of layers of encoder network.
        eunits (int): Number of lstm units of encoder network.
        eprojs (int): Number of projection units of encoder network.
        subsample (np.array): Subsampling number. e.g. 1_2_2_2_1
        dropout (float): Dropout rate.

    r   c	           
         s  t t|   |dd}	|	dvrtd |   |drn|d dkrJt	
t|tt||d||||||	d| _td|	  d	  n t	
t|tt||d|||||	d| _td|	  d
  d| _nA|d dkrt	
t|||||||	d| _t|	 d  nt	
t||||||	d| _t|	 d  d| _W d    d S W d    d S 1 sw   Y  d S )Nvggp)r	   grur   bgruz:Error: need to specify an appropriate encoder architecturer/   )rX   )r    zUse CNN-VGG + zP for encoderz for encoder   z( with every-layer projection for encoderz without projection for encoderr   )r   rk   r   lstriprstripr2   errorr   
startswithchainer
SequentialrM   r   r   encr3   upperrI   conv_subsampling_factor)
r"   etyper#   r   eunitseprojsr   r%   rX   r    r)   r+   r,   r      s`   





"zEncoder.__init__c                 C   s   |  ||\}}||fS )aE  Encoder forward.

        Args:
            xs (chainer.Variable): Batch of padded character ids. (B, Tmax)
            ilens (chainer.variable): Batch of length of each features. (B,)

        Returns:
            chainer.Variable: Output of the encoder.
            chainer.Variable: (Subsampled) vector of ilens.

        )rw   ri   r+   r+   r,   rC   $  s   zEncoder.__call__rj   rD   r+   r+   r)   r,   rk      s
    4rk   c                 C   s   t | j|| j| j| j|| jS )zReturn the Encoder module.

    Args:
        idim (int): Dimension of input array.
        subsample (numpy.array): Subsample number. egs).1_2_2_2_1

    Return
        chainer.nn.Module: Encoder module.

    )rk   rz   r   r{   r|   dropout_rate)argsr#   r   r+   r+   r,   encoder_for5  s   r   )r2   ru   chainer.functions	functionsr6   chainer.linkslinksr   numpyr9   r   r   &espnet.nets.chainer_backend.nets_utilsr   espnet.nets.e2e_asr_commonr   Chainr   rI   rM   rk   r   r+   r+   r+   r,   <module>   s    N=HS