o
    si"                     @   s   d dl mZ d dlZd dlmZ d dlmZ ddlmZm	Z	 d dl
Z
ddlmZ ddlmZ G d	d
 d
ejZG dd dejZdS )    )ceilN)MultiheadAttention   )activationsnorms   )has_arg)DualPathProcessingc                       s2   e Zd ZdZ				d
 fdd	Zdd	 Z  ZS )ImprovedTransformedLayera  
    Improved Transformer module as used in [1].
    It is Multi-Head self-attention followed by LSTM, activation and linear projection layer.

    Args:
        embed_dim (int): Number of input channels.
        n_heads (int): Number of attention heads.
        dim_ff (int): Number of neurons in the RNNs cell state.
            Defaults to 256. RNN here replaces standard FF linear layer in plain Transformer.
        dropout (float, optional): Dropout ratio, must be in [0,1].
        activation (str, optional): activation function applied at the output of RNN.
        bidirectional (bool, optional): True for bidirectional Inter-Chunk RNN
            (Intra-Chunk is always bidirectional).
        norm (str, optional): Type of normalization to use.

    References
        [1] Chen, Jingjing, Qirong Mao, and Dong Liu. "Dual-Path Transformer
        Network: Direct Context-Aware Modeling for End-to-End Monaural Speech Separation."
        arXiv (2020).
            reluTgLNc           	         s   t t|   t|||d| _t|| _tj|||dd| _	|r%d| n|}t
||| _t| | _t||| _t||| _d S )N)dropoutT)bidirectionalbatch_firstr   )superr
   __init__r   mhannDropoutr   LSTM	recurrentLinearlinearr   get
activationr   norm_mhanorm_ff)	self	embed_dimn_headsdim_ffr   r   r   normff_inner_dim	__class__ M/home/ubuntu/.local/lib/python3.10/site-packages/asteroid/masknn/attention.pyr   "   s   
z!ImprovedTransformedLayer.__init__c                 C   s   | ddd}| |||d }| | ddd| }| |}| | | | |ddd }| |dd| }| |S )Nr   r   r   )	permuter   r   r   r   r   r   	transposer   )r   xtomhaoutr&   r&   r'   forward7   s   
(
z ImprovedTransformedLayer.forward)r   r   Tr   )__name__
__module____qualname____doc__r   r.   __classcell__r&   r&   r$   r'   r
      s    r
   c                       sF   e Zd ZdZ											
d fdd	Zdd Zdd Z  ZS )DPTransformera  Dual-path Transformer introduced in [1].

    Args:
        in_chan (int): Number of input filters.
        n_src (int): Number of masks to estimate.
        n_heads (int): Number of attention heads.
        ff_hid (int): Number of neurons in the RNNs cell state.
            Defaults to 256.
        chunk_size (int): window size of overlap and add processing.
            Defaults to 100.
        hop_size (int or None): hop size (stride) of overlap and add processing.
            Default to `chunk_size // 2` (50% overlap).
        n_repeats (int): Number of repeats. Defaults to 6.
        norm_type (str, optional): Type of normalization to use.
        ff_activation (str, optional): activation function applied at the output of RNN.
        mask_act (str, optional): Which non-linear function to generate mask.
        bidirectional (bool, optional): True for bidirectional Inter-Chunk RNN
            (Intra-Chunk is always bidirectional).
        dropout (float, optional): Dropout ratio, must be in [0,1].

    References
        [1] Chen, Jingjing, Qirong Mao, and Dong Liu. "Dual-Path Transformer
        Network: Direct Context-Aware Modeling for End-to-End Monaural Speech Separation."
        arXiv (2020).
          d   N   r   r   Tr   c                    s  t t|   || _|| _|| _|| _|| _|d ur|n|d }|| _|| _	|| _|| _
|	| _|
| _|| _|| _t| j| j | j | _| j| j dkrltd| j d| j d| j d| j d	 t| j| j| _nd | _t|| j| _t| j| j| _tg | _t| j	D ]*}| jtt| j| j| j| j| jd| j
t| j| j| j| j| j| j| j
g qt | j|| j d	}t!t" || _#t!t$| j| jd	t% | _&t!t$| j| jd	t' | _(t)|
}t*|d
r|d	d| _+d S | | _+d S )Nr   r   zDPTransformer input dim (z,) is not a multiple of the number of heads (z;). Adding extra linear layer at input to accomodate (size [z x z])Tr   dim)r9   ),r   r4   r   in_chann_srcr    ff_hid
chunk_sizehop_size	n_repeats	norm_typeff_activationmask_actr   r   r   
mha_in_dimwarningswarnr   r   input_layerr   r   in_normr	   ola
ModuleListlayersrangeappendr
   Conv2d
SequentialPReLU	first_outConv1dTanhnet_outSigmoidnet_gater   r   
output_act)r   r:   r;   r    r<   r=   r>   r?   r@   rA   rB   r   r   r+   net_out_convmask_nl_classr$   r&   r'   r   a   s~   
	  

zDPTransformer.__init__c                 C   s   | j dur|  |dddd}| |}|jd }| j|}| \}}| _}tt	| j
D ]}| j
| \}}| j||}| j||}q2| |}	|	|| j | j| j|}	| jj|	|d}	| |	| |	 }	|	|| j| jd}	| |	}
|
S )zForward.

        Args:
            mixture_w (:class:`torch.Tensor`): Tensor of shape $(batch, nfilters, nframes)$

        Returns:
            :class:`torch.Tensor`: estimated mask of shape $(batch, nsrc, nfilters, nframes)$
        Nr   r   r(   )output_size)rF   r*   rG   shaperH   unfoldsizer=   rK   lenrJ   intra_processinter_processrP   reshaper;   r:   foldrS   rU   rV   )r   	mixture_wn_orig_framesbatch	n_filtersn_chunks	layer_idxintrainteroutputest_maskr&   r&   r'   r.      s"   
	



zDPTransformer.forwardc                 C   s:   | j | j| j| j| j| j| j| j| j| j	| j
| jd}|S )Nr:   r<   r    r=   r>   r?   r;   r@   rA   rB   r   r   rl   )r   configr&   r&   r'   
get_config   s   zDPTransformer.get_config)
r5   r6   r7   Nr8   r   r   r   Tr   )r/   r0   r1   r2   r   r.   rn   r3   r&   r&   r$   r'   r4   F   s    V r4   )mathr   rD   torch.nnr   torch.nn.modules.activationr    r   r   torchutilsr   dsp.overlap_addr	   Moduler
   r4   r&   r&   r&   r'   <module>   s    :