o
    i                     @   sL   d dl mZ d dlmZ d dlmZ G dd dejZG dd dejZdS )    N)choose_norm)get_activationc                       s4   e Zd ZdZ				d fdd	Zdd	d
Z  ZS )ImprovedTransformerLayera  Container module of the (improved) Transformer proposed in [1].

    Reference:
        Dual-path transformer network: Direct context-aware modeling for end-to-end
        monaural speech separation; Chen et al, Interspeech 2020.

    Args:
        rnn_type (str): select from 'RNN', 'LSTM' and 'GRU'.
        input_size (int): Dimension of the input feature.
        att_heads (int): Number of attention heads.
        hidden_size (int): Dimension of the hidden state.
        dropout (float): Dropout ratio. Default is 0.
        activation (str): activation function applied at the output of RNN.
        bidirectional (bool, optional): True for bidirectional Inter-Chunk RNN
            (Intra-Chunk is always bidirectional).
        norm (str, optional): Type of normalization to use.
            reluTgLNc	           
         s   t    | }|dv sJ d| || _|| _tj|||d| _tj|d| _	t
||| _tt|||dd|d| _t|}|rGd| n|}	t|tj|dt|	|| _t
||| _d S )	N)RNNLSTMGRUz4Only support 'RNN', 'LSTM' and 'GRU', current type: )dropout)p   T)batch_firstbidirectional   )super__init__upperrnn_type	att_headsnnMultiheadAttention	self_attnDropoutr   r   	norm_attngetattrrnnr   
SequentialLinearfeed_forwardnorm_ff)
selfr   
input_sizer   hidden_sizer   
activationr   normhdim	__class__ M/home/ubuntu/.local/lib/python3.10/site-packages/espnet2/enh/layers/dptnet.pyr   !   s.   

z!ImprovedTransformerLayer.__init__Nc                 C   s   | ddd}| j||||dd  ddd}| || }| |dddd}| | |d }| || }| |ddddS )Nr   r   r   )	attn_mask)permuter   r   r   	transposer   r   r    )r!   xr+   srcoutout2r)   r)   r*   forwardK   s    z ImprovedTransformerLayer.forward)r   r   Tr   N)__name__
__module____qualname____doc__r   r4   __classcell__r)   r)   r'   r*   r      s    *r   c                       sF   e Zd ZdZ						d fdd		Zd
d Zdd Zdd Z  ZS )DPTNeta  Dual-path transformer network.

    args:
        rnn_type (str): select from 'RNN', 'LSTM' and 'GRU'.
        input_size (int): dimension of the input feature.
            Input size must be a multiple of `att_heads`.
        hidden_size (int): dimension of the hidden state.
        output_size (int): dimension of the output size.
        att_heads (int): number of attention heads.
        dropout (float): dropout ratio. Default is 0.
        activation (str): activation function applied at the output of RNN.
        num_layers (int): number of stacked RNN layers. Default is 1.
        bidirectional (bool): whether the RNN layers are bidirectional. Default is True.
        norm_type (str): type of normalization to use after each inter- or
            intra-chunk Transformer block.
       r   r   r   Tr   c                    s   t    || _|| _|| _t | _t | _t	|D ]"}| j
t||||||d|
d | j
t|||||||	|
d qtt t||d| _d S )NT)r   r$   r   r%   r   )r   r   r"   r#   output_sizer   
ModuleListrow_transformercol_transformerrangeappendr   r   PReLUConv2doutput)r!   r   r"   r#   r=   r   r   r$   
num_layersr   	norm_typeir'   r)   r*   r   k   s@   


 zDPTNet.__init__c                 C   s>   |}t t| jD ]}| ||}| ||}q	| |}|S r5   )rA   lenr?   intra_chunk_processinter_chunk_processrE   )r!   inputrE   rH   r)   r)   r*   r4      s   
zDPTNet.forwardc                 C   sX   |  \}}}}|dd|| ||}| j| |}|||||dddd}|S )Nr   r,   r      r   )sizer/   reshaper?   r.   r!   r0   layer_indexbatchN
chunk_sizen_chunksr)   r)   r*   rJ      s
   zDPTNet.intra_chunk_processc                 C   s\   |  \}}}}|dddd|| ||}| j| |}|||||dddd}|S )Nr   r   rM   r   )rN   r.   rO   r@   rP   r)   r)   r*   rK      s
   zDPTNet.inter_chunk_process)r<   r   r   r   Tr   )	r6   r7   r8   r9   r   r4   rJ   rK   r:   r)   r)   r'   r*   r;   Y   s    3r;   )	torch.nnr   espnet2.enh.layers.tcnr   &espnet.nets.pytorch_backend.nets_utilsr   Moduler   r;   r)   r)   r)   r*   <module>   s
   K