o
    ´©iî6  ã                   @   sÒ   d dl Z d dlZd dlZd dlZd dlm  mZ d dl	m
Z
 d dl	mZ d dlmZ d dlmZ d dlmZ G dd„ dejjƒZG d	d
„ d
ejjƒZdd„ ZG dd„ dejjƒZG dd„ dejjƒZdd„ ZdS )é    N)Úpack_padded_sequence)Úpad_packed_sequence)Úget_vgg2l_odim)Úmake_pad_mask)Ú	to_devicec                       ó,   e Zd ZdZd‡ fdd„	Zd	dd„Z‡  ZS )
ÚRNNPa†  RNN with projection layer module

    :param int idim: dimension of inputs
    :param int elayers: number of encoder layers
    :param int cdim: number of rnn units (resulted in cdim * 2 if bidirectional)
    :param int hdim: number of projection units
    :param np.ndarray subsample: list of subsampling numbers
    :param float dropout: dropout rate
    :param str typ: The RNN type
    Úblstmc                    sê   t t| ƒ ¡  |d dk}tj |¡D ]M}	|	dkr|}
n|}
d|v r&tjjntjj	}||
|d|dd}t
| d|r:dnd	|	f |ƒ |rSt
| d
|	 tj d| |¡ƒ qt
| d
|	 tj ||¡ƒ q|| _|| _|| _|| _|| _|| _d S )Nr   ÚbÚlstmé   T)Ú
num_layersÚbidirectionalÚbatch_firstz%s%dÚbirnnÚrnnúbt%dé   )Úsuperr   Ú__init__ÚsixÚmovesÚrangeÚtorchÚnnÚLSTMÚGRUÚsetattrÚLinearÚelayersÚcdimÚ	subsampleÚtypÚbidirÚdropout)ÚselfÚidimr   r    Úhdimr!   r$   r"   r#   ÚiÚinputdimÚRNNr   ©Ú	__class__© ú]/home/ubuntu/.local/lib/python3.10/site-packages/funasr/models/language_model/rnn/encoders.pyr      s$    
zRNNP.__init__Nc                    s‚  t  | jjd t|ƒ ¡ g }tj | j¡D ]¥}t	|t
jƒs#t
 |¡}t|| ¡ dd}t| | jr3dndt|ƒ ƒ}| ¡  |durI|jrIt|ƒ}|||du rQdn|| d\}}	| |	¡ t|dd\}
}| j|d  ‰ ˆ dkrˆ|
dd…ddˆ …f }
t
 ‡ fd	d
„|D ƒ¡}t| d| ƒ}||
 ¡  d|
 d¡¡ƒ}| |
 d¡|
 d¡d¡}|| jd k r»t
 tj|| jd¡}q|||fS )aX  RNNP forward

        :param torch.Tensor xs_pad: batch of padded input sequences (B, Tmax, idim)
        :param torch.Tensor ilens: batch of lengths of input sequences (B)
        :param torch.Tensor prev_state: batch of previous RNN states
        :return: batch of hidden state sequences (B, Tmax, hdim)
        :rtype: torch.Tensor
        ú input lengths: T©r   r   r   N©Úhxr   c                    s   g | ]
}t |d  ƒˆ  ‘qS ©r   )Úint)Ú.0r(   ©Úsubr-   r.   Ú
<listcomp>P   s    z RNNP.forward.<locals>.<listcomp>r   éÿÿÿÿr   r   )Úp)ÚloggingÚdebugr,   Ú__name__Ústrr   r   r   r   Ú
isinstancer   ÚTensorÚtensorr   ÚcpuÚgetattrr#   Úflatten_parametersr   Úreset_backward_rnn_stateÚappendr   r!   Ú
contiguousÚviewÚsizeÚtanhÚFr$   )r%   Úxs_padÚilensÚ
prev_stateÚelayer_statesÚlayerÚxs_packr   ÚysÚstatesÚys_padÚprojection_layerÚ	projectedr-   r6   r.   Úforward6   s0   	
 
€
zRNNP.forward©r	   ©N©r=   Ú
__module__Ú__qualname__Ú__doc__r   rW   Ú__classcell__r-   r-   r+   r.   r      s    r   c                       r   )
r*   a9  RNN module

    :param int idim: dimension of inputs
    :param int elayers: number of encoder layers
    :param int cdim: number of rnn units (resulted in cdim * 2 if bidirectional)
    :param int hdim: number of final projection units
    :param float dropout: dropout rate
    :param str typ: The RNN type
    r	   c                    sˆ   t t| ƒ ¡  |d dk}d|v rtjj|||d||dntjj|||d||d| _|r7tj |d |¡| _	ntj ||¡| _	|| _
d S )Nr   r
   r   T)r   r$   r   r   )r   r*   r   r   r   r   r   Únbrnnr   Úl_lastr"   )r%   r&   r   r    r'   r$   r"   r#   r+   r-   r.   r   f   s0   
øú	úö
zRNN.__init__Nc           	   
   C   sÌ   t  | jjd t|ƒ ¡ t|tjƒst |¡}t	|| 
¡ dd}| j ¡  |dur2| jjr2t|ƒ}| j||d\}}t|dd\}}t |  | ¡  d| d¡¡¡¡}| | d¡| d	¡d¡}|||fS )
aV  RNN forward

        :param torch.Tensor xs_pad: batch of padded input sequences (B, Tmax, D)
        :param torch.Tensor ilens: batch of lengths of input sequences (B)
        :param torch.Tensor prev_state: batch of previous RNN states
        :return: batch of hidden state sequences (B, Tmax, eprojs)
        :rtype: torch.Tensor
        r/   Tr0   Nr1   r9   r   r   r   )r;   r<   r,   r=   r>   r?   r   r@   rA   r   rB   r_   rD   r   rE   r   rJ   r`   rG   rH   rI   )	r%   rL   rM   rN   rQ   rR   rS   rT   rV   r-   r-   r.   rW   ‚   s   	

"
zRNN.forwardrX   rY   rZ   r-   r-   r+   r.   r*   [   s    
r*   c                 C   s<   t | ttfƒr| D ]	}d|ddd…< q	| S d| ddd…< | S )zeSets backward BRNN states to zeroes

    Useful in processing of sliding windows over the inputs
    ç        r   Nr   )r?   ÚlistÚtuple)rS   Ústater-   r-   r.   rE   Ÿ   s   ÿrE   c                       s*   e Zd ZdZd‡ fdd„	Zdd„ Z‡  ZS )ÚVGG2LzIVGG-like module

    :param int in_channel: number of input channels
    r   c                    sx   t t| ƒ ¡  tjj|ddddd| _tjjdddddd| _tjjdddddd| _tjjdddddd| _	|| _
d S )Né@   é   r   )ÚstrideÚpaddingé€   )r   re   r   r   r   ÚConv2dÚconv1_1Úconv1_2Úconv2_1Úconv2_2Ú
in_channel)r%   rp   r+   r-   r.   r   ²   s   
zVGG2L.__init__c                 K   s`  t  | jjd t|ƒ ¡ | | d¡| d¡| j| d¡| j ¡ dd¡}t	 
|  |¡¡}t	 
|  |¡¡}t	j|dddd}t	 
|  |¡¡}t	 
|  |¡¡}t	j|dddd}t |¡rd| ¡  ¡ }ntj|tjd}tjt |d ¡tjd}tjt tj|tjdd ¡tjd ¡ }| dd¡}| ¡  | d¡| d¡| d¡| d¡ ¡}||d	fS )
a%  VGG2L forward

        :param torch.Tensor xs_pad: batch of padded input sequences (B, Tmax, D)
        :param torch.Tensor ilens: batch of lengths of input sequences (B)
        :return: batch of padded hidden state sequences (B, Tmax // 4, 128 * D // 4)
        :rtype: torch.Tensor
        r/   r   r   r   T)rh   Ú	ceil_mode)Údtyperg   N)r;   r<   r,   r=   r>   rH   rI   rp   Ú	transposerK   Úrelurl   rm   Ú
max_pool2drn   ro   r   Ú	is_tensorrB   ÚnumpyÚnpÚarrayÚfloat32ÚceilÚint64ÚtolistrG   )r%   rL   rM   Úkwargsr-   r-   r.   rW   ¼   s2   üû
*"ÿ
zVGG2L.forwardr3   rZ   r-   r-   r+   r.   re   ¬   s    
re   c                       r   )
ÚEncodera×  Encoder module

    :param str etype: type of encoder network
    :param int idim: number of dimensions of encoder network
    :param int elayers: number of layers of encoder network
    :param int eunits: number of lstm units of encoder network
    :param int eprojs: number of projection units of encoder network
    :param np.ndarray subsample: list of subsampling numbers
    :param float dropout: dropout rate
    :param int in_channel: number of input channels
    r   c	           
         sZ  t t| ƒ ¡  | d¡ d¡}	|	dvrt d¡ | d¡rn|d dkrGtj	 
t|ƒtt||d||||||	dg¡| _t d|	 ¡  d	 ¡ n"tj	 
t|ƒtt||d|||||	dg¡| _t d|	 ¡  d
 ¡ d| _d S |d dkrtj	 
t|||||||	dg¡| _t |	 ¡ d ¡ ntj	 
t||||||	dg¡| _t |	 ¡ d ¡ d| _d S )NÚvggr:   )r   Úgrur	   Úbgruz:Error: need to specify an appropriate encoder architecturer9   )rp   )r"   zUse CNN-VGG + zP for encoderz for encoderé   z( with every-layer projection for encoderz without projection for encoderr   )r   r   r   ÚlstripÚrstripr;   ÚerrorÚ
startswithr   r   Ú
ModuleListre   r   r   ÚencÚinfoÚupperr*   Úconv_subsampling_factor)
r%   Úetyper&   r   ÚeunitsÚeprojsr!   r$   rp   r"   r+   r-   r.   r   õ   sX   


ùþÿ
úþÿ
ÿÿ
zEncoder.__init__Nc           	      C   sŒ   |du rdgt | jƒ }t |ƒt | jƒksJ ‚g }t| j|ƒD ]\}}||||d\}}}| |¡ qt|t|ƒ d¡ƒ}| |d¡||fS )an  Encoder forward

        :param torch.Tensor xs_pad: batch of padded input sequences (B, Tmax, D)
        :param torch.Tensor ilens: batch of lengths of input sequences (B)
        :param torch.Tensor prev_state: batch of previous encoder hidden states (?, ...)
        :return: batch of hidden state sequences (B, Tmax, eprojs)
        :rtype: torch.Tensor
        N)rN   r9   ra   )Úlenr‰   ÚziprF   r   r   Ú	unsqueezeÚmasked_fill)	r%   rL   rM   Úprev_statesÚcurrent_statesÚmodulerN   rS   Úmaskr-   r-   r.   rW   )  s   	zEncoder.forwardr3   rY   rZ   r-   r-   r+   r.   r   è   s    4r   c              
   C   s¤   t | ddƒ}|dkrt| j|| j| j| j|| jƒS |dkrKtj 	¡ }t
|ƒD ]"}t| j| || | j| | j| | j|| | j| ƒ}| |¡ q&|S td |¡ƒ‚)a.  Instantiates an encoder module given the program arguments

    :param Namespace args: The arguments
    :param int or List of integer idim: dimension of input, e.g. 83, or
                                        List of dimensions of inputs, e.g. [83,83]
    :param List or List of List subsample: subsample factors, e.g. [1,2,2,1,1], or
                                        List of subsample factors of each encoder.
                                         e.g. [[1,2,2,1,1], [1,2,2,1,1]]
    :rtype torch.nn.Module
    :return: The encoder module
    Únum_encsr   z0Number of encoders needs to be more than one. {})rC   r   r   r   rŽ   r   Údropout_rater   r   rˆ   r   rF   Ú
ValueErrorÚformat)Úargsr&   r!   r˜   Úenc_listÚidxr‰   r-   r-   r.   Úencoder_forA  s4   ù	
ù	rŸ   )r;   rw   rx   r   r   Útorch.nn.functionalr   Ú
functionalrK   Útorch.nn.utils.rnnr   r   Úfunasr.metrics.commonr   Ú*funasr.models.transformer.utils.nets_utilsr   r   ÚModuler   r*   rE   re   r   rŸ   r-   r-   r-   r.   Ú<module>   s     LD<Y