o
    i]                     @   s   d dl mZ d dlmZmZmZmZ d dlZd dlm	Z
 d dlmZ d dlmZ d dlmZ e
eje
dkZG d	d
 d
eZdS )    )OrderedDict)DictListOptionalTupleN)parse)
FaSNet_TAC)iFaSNet)AbsSeparatorz1.9.0c                       s   e Zd Z			ddededededed	ed
ededededededef fddZ	ddej	dej	de
e deeej	 ej	ef fddZedd Z  ZS )FaSNetSeparator        >  F	input_dimenc_dimfeature_dim
hidden_dimlayersegment_sizenum_spkwin_lencontext_lenfasnet_typedropoutsrpredict_noisec                    sd   t    || _|| _|
dv sJ d|
dkrtnt}|||||||r'|d n|||	||d
| _dS )a  Filter-and-sum Network (FaSNet) Separator

        Args:
            input_dim: required by AbsSeparator. Not used in this model.
            enc_dim: encoder dimension
            feature_dim: feature dimension
            hidden_dim: hidden dimension in DPRNN
            layer: number of DPRNN blocks in iFaSNet
            segment_size: dual-path segment size
            num_spk: number of speakers
            win_len: window length in millisecond
            context_len: context length in millisecond
            fasnet_type: 'fasnet' or 'ifasnet'.
                Select from origin fasnet or Implicit fasnet
            dropout: dropout rate. Default is 0.
            sr: samplerate of input audio
            predict_noise: whether to output the estimated noise signal
        )fasnetifasnetzonly support fasnet and ifasnetr      )
r   r   r   r   r   nspkr   r   r   r   N)super__init___num_spkr   r   r	   r   )selfr   r   r   r   r   r   r   r   r   r   r   r   r   FASNET	__class__ Z/home/ubuntu/.local/lib/python3.10/site-packages/espnet2/enh/separator/fasnet_separator.pyr       s"   
"zFaSNetSeparator.__init__Ninputilens
additionalreturnc                 C   st   |  dks
J d|ddd}tjd|jd}| ||}t|jdd}i }| jr5|^ }}||d< |||fS )	a  Forward.

        Args:
            input (torch.Tensor): (Batch, samples, channels)
            ilens (torch.Tensor): input lengths [Batch]
            additional (Dict or None): other data included in model
                NOTE: not used in this model

        Returns:
            separated (List[Union(torch.Tensor, ComplexTensor)]): [(B, T, N), ...]
            ilens (torch.Tensor): (B,)
            others predicted data, e.g. masks: OrderedDict[
                'mask_spk1': torch.Tensor(Batch, Frames, Freq),
                'mask_spk2': torch.Tensor(Batch, Frames, Freq),
                ...
                'mask_spkn': torch.Tensor(Batch, Frames, Freq),
            ]
           z4only support input shape: (Batch, samples, channels)r      r   )dtype)dimnoise1)	r/   permutetorchzerosr.   r   listunbindr   )r"   r(   r)   r*   none_mic	separatedothersnoiser&   r&   r'   forwardG   s   

zFaSNetSeparator.forwardc                 C   s   | j S N)r!   )r"   r&   r&   r'   r   r   s   zFaSNetSeparator.num_spk)r   r   Fr;   )__name__
__module____qualname__intstrfloatboolr    r2   Tensorr   r   r   r   r   r:   propertyr   __classcell__r&   r&   r$   r'   r      sV    	
<
+r   )collectionsr   typingr   r   r   r   r2   packaging.versionr   Vespnet2.enh.layers.fasnetr   espnet2.enh.layers.ifasnetr	   #espnet2.enh.separator.abs_separatorr
   __version__is_torch_1_9_plusr   r&   r&   r&   r'   <module>   s    