o
    s·¯ii  ã                   @   s:   d dl Z d dlmZ ddlmZmZ G dd„ dejƒZdS )é    Né   )ÚactivationsÚnormsc                       s,   e Zd ZdZd
‡ fdd„	Zddd	„Z‡  ZS )ÚTACa_  Transform-Average-Concatenate inter-microphone-channel permutation invariant communication block [1].

    Args:
        input_dim (int): Number of features of input representation.
        hidden_dim (int, optional): size of hidden layers in TAC operations.
        activation (str, optional): type of activation used. See asteroid.masknn.activations.
        norm_type (str, optional): type of normalization layer used. See asteroid.masknn.norms.

    .. note:: Supports inputs of shape :math:`(batch, mic\_channels, features, chunk\_size, n\_chunks)`
        as in FasNet-TAC. The operations are applied for each element in ``chunk_size`` and ``n_chunks``.
        Output is of same shape as input.

    References
        [1] : Luo, Yi, et al. "End-to-end microphone permutation and number invariant multi-channel
        speech separation." ICASSP 2020.
    é€  ÚpreluÚgLNc                    s‚   t ƒ  ¡  || _t t ||¡t |¡ƒ ¡| _t t ||¡t |¡ƒ ¡| _	t t d| |¡t |¡ƒ ¡| _
t |¡|ƒ| _d S )Né   )ÚsuperÚ__init__Ú
hidden_dimÚnnÚ
SequentialÚLinearr   ÚgetÚinput_tfÚavg_tfÚ	concat_tfr   Únorm)ÚselfÚ	input_dimr   Ú
activationÚ	norm_type©Ú	__class__© úG/home/ubuntu/.local/lib/python3.10/site-packages/asteroid/masknn/tac.pyr      s   
ÿÿÿzTAC.__init__Nc           	   	      sZ  |  ¡ \}}}}}ˆdu rt |g| ¡‰|  | ddddd¡ || | | |¡¡ ||||| j¡‰ ˆ ¡ dkr?ˆ  d¡}n‡ ‡fdd„t	|ƒD ƒ}t 
|d¡}|  | || | | j¡¡}| |||| j¡ d¡ ˆ ¡}t 
ˆ |gd	¡‰ |  ˆ  || | | d	¡¡ ||||d	¡‰ |  ˆ  ddddd¡ || d	||¡¡ ||d	||¡‰ ˆ |7 ‰ ˆ S )
aB  
        Args:
            x: (:class:`torch.Tensor`): Input multi-channel DPRNN features.
                Shape: :math:`(batch, mic\_channels, features, chunk\_size, n\_chunks)`.
            valid_mics: (:class:`torch.LongTensor`): tensor containing effective number of microphones on each batch.
                Batches can be composed of examples coming from arrays with a different
                number of microphones and thus the ``mic_channels`` dimension is padded.
                E.g. torch.tensor([4, 3]) means first example has 4 channels and the second 3.
                Shape:  :math`(batch)`.

        Returns:
            output (:class:`torch.Tensor`): features for each mic_channel after TAC inter-channel processing.
                Shape :math:`(batch, mic\_channels, features, chunk\_size, n\_chunks)`.
        Nr   é   é   r   r	   c              	      s8   g | ]}ˆ |d d …d d …d ˆ| …f   d¡ d¡‘qS )Nr	   r   )ÚmeanÚ	unsqueeze)Ú.0Úb©ÚoutputÚ
valid_micsr   r   Ú
<listcomp>D   s    ,ÿzTAC.forward.<locals>.<listcomp>éÿÿÿÿ)ÚsizeÚtorchÚ
LongTensorr   ÚpermuteÚreshaper   Úmaxr   ÚrangeÚcatr   r    Ú	expand_asr   r   )	r   Úxr%   Ú
batch_sizeÚnmicsÚchannelsÚ
chunk_sizeÚn_chunksÚ	mics_meanr   r#   r   Úforward&   sD   $ÿþÿÿýÿþ ÿþzTAC.forward)r   r   r   )N)Ú__name__Ú
__module__Ú__qualname__Ú__doc__r   r8   Ú__classcell__r   r   r   r   r      s    r   )r)   Útorch.nnr   Ú r   r   ÚModuler   r   r   r   r   Ú<module>   s    