o
    iC                     @   s   d dl mZ d dlmZmZmZmZmZ d dlZd dl	m
Z d dlmZ d dlmZ d dlmZ d dlmZ eejed	kZG d
d deZdS )    )OrderedDict)DictListOptionalTupleUnionN)parse)ComplexTensor)
is_complex)TemporalConvNet)AbsSeparatorz1.9.0c                       s   e Zd Z										dd	ed
edededededededededef fddZ	d deej	e
f dej	dee deeeej	e
f  ej	ef fddZedd Z  ZS )!TCNSeparator   F            gLNrelu	input_dimnum_spkpredict_noiselayerstackbottleneck_dim
hidden_dimkernelcausal	norm_type	nonlinearc                    sZ   t    || _|| _|dvrtd|t|||||||r#|d n||
|	|d
| _dS )a  Temporal Convolution Separator

        Args:
            input_dim: input feature dimension
            num_spk: number of speakers
            predict_noise: whether to output the estimated noise signal
            layer: int, number of layers in each stack.
            stack: int, number of stacks
            bottleneck_dim: bottleneck dimension
            hidden_dim: number of convolution channel
            kernel: int, kernel size.
            causal: bool, defalut False.
            norm_type: str, choose from 'BN', 'gLN', 'cLN'
            nonlinear: the nonlinear function for mask estimation,
                       select from 'relu', 'tanh', 'sigmoid'
        )sigmoidr   tanhzNot supporting nonlinear={}   )
NBHPXRCr   r   mask_nonlinearN)super__init___num_spkr   
ValueErrorformatr   tcn)selfr   r   r   r   r   r   r   r   r   r   r   	__class__ W/home/ubuntu/.local/lib/python3.10/site-packages/espnet2/enh/separator/tcn_separator.pyr,      s"   
zTCNSeparator.__init__Ninputilens
additionalreturnc                    s   t  r	t }n }|j\}}}|dd}| |}|dd}| jr/|jdd^ }}	n|jdd} fdd|D }
ttdd t	t
|D |}| jrW |	 |d< |
||fS )	a  Forward.

        Args:
            input (torch.Tensor or ComplexTensor): Encoded feature [B, T, N]
            ilens (torch.Tensor): input lengths [Batch]
            additional (Dict or None): other data included in model
                NOTE: not used in this model

        Returns:
            masked (List[Union(torch.Tensor, ComplexTensor)]): [(B, T, N), ...]
            ilens (torch.Tensor): (B,)
            others predicted data, e.g. masks: OrderedDict[
                'mask_spk1': torch.Tensor(Batch, Frames, Freq),
                'mask_spk2': torch.Tensor(Batch, Frames, Freq),
                ...
                'mask_spkn': torch.Tensor(Batch, Frames, Freq),
            ]
        r"   r   r   )dimc                    s   g | ]} | qS r4   r4   ).0mr6   r4   r5   
<listcomp>k   s    z(TCNSeparator.forward.<locals>.<listcomp>c                 S   s   g | ]	}d  |d qS )z
mask_spk{}r"   )r/   )r;   ir4   r4   r5   r>   n   s    noise1)r
   absshape	transposer0   r   unbindr   ziprangelen)r1   r6   r7   r8   featurer$   Lr#   masks
mask_noisemaskedothersr4   r=   r5   forwardC   s"   


zTCNSeparator.forwardc                 C   s   | j S N)r-   )r1   r4   r4   r5   r   u   s   zTCNSeparator.num_spk)
r   Fr   r   r   r   r   Fr   r   rO   )__name__
__module____qualname__intboolstrr,   r   torchTensorr	   r   r   r   r   r   rN   propertyr   __classcell__r4   r4   r2   r5   r      s\    	
7
2r   )collectionsr   typingr   r   r   r   r   rV   packaging.versionr   Vtorch_complex.tensorr	    espnet2.enh.layers.complex_utilsr
   espnet2.enh.layers.tcnr   #espnet2.enh.separator.abs_separatorr   __version__is_torch_1_9_plusr   r4   r4   r4   r5   <module>   s    