o
    i                     @   st   d dl mZ d dlmZmZmZmZmZ d dlZd dl	m
Z
 d dlmZ d dlmZ d dlmZ G dd	 d	eZdS )
    )OrderedDict)DictListOptionalTupleUnionN)ComplexTensor)
is_complex)SkiM)AbsSeparatorc                       s   e Zd ZdZ										
	d!dededededededededededef fddZ	d"de	e
jef de
jdee deee	e
jef  e
jef fddZedd  Z  ZS )#SkiMSeparatora  Skipping Memory (SkiM) Separator

    Args:
        input_dim: input feature dimension
        causal: bool, whether the system is causal.
        num_spk: number of target speakers.
        nonlinear: the nonlinear function for mask estimation,
            select from 'relu', 'tanh', 'sigmoid'
        layer: int, number of SkiM blocks. Default is 3.
        unit: int, dimension of the hidden state.
        segment_size: segmentation size for splitting long features
        dropout: float, dropout ratio. Default is 0.
        mem_type: 'hc', 'h', 'c', 'id' or None.
            It controls whether the hidden (or cell) state of
            SegLSTM will be processed by MemLSTM.
            In 'id' mode, both the hidden and cell states
            will be identically returned.
            When mem_type is None, the MemLSTM will be removed.
        seg_overlap: Bool, whether the segmentation will reserve 50%
            overlap for adjacent segments. Default is False.
    T   Frelu                 hc	input_dimcausalnum_spkpredict_noise	nonlinearlayerunitsegment_sizedropoutmem_typeseg_overlapc                    s   t    || _|| _|| _|
dvrtd|
| jr!| jd n| j| _t	|||| j |	|| |r4dnd|||
d
| _
|dvrGtd|tj tj tj d| | _d S )	N)r   hcidNzNot supporting mem_type={}   cLNgLN)

input_sizehidden_sizeoutput_sizer   
num_blocksbidirectional	norm_typer   r   r   )sigmoidr   tanhzNot supporting nonlinear={})super__init___num_spkr   r   
ValueErrorformatr   num_outputsr
   skimtorchnnSigmoidReLUTanhr   )selfr   r   r   r   r   r   r   r   r   r   r   	__class__ X/home/ubuntu/.local/lib/python3.10/site-packages/espnet2/enh/separator/skim_separator.pyr.   #   s6   


zSkiMSeparator.__init__Ninputilens
additionalreturnc                    s   t  r	t }n }|j\}}}| |}||||| j}| |jdd}	| jr0|	^ }	}
 fdd|	D }t	t
dd tt|	D |	}| jrR |
 |d< |||fS )a  Forward.

        Args:
            input (torch.Tensor or ComplexTensor): Encoded feature [B, T, N]
            ilens (torch.Tensor): input lengths [Batch]
            additional (Dict or None): other data included in model
                NOTE: not used in this model

        Returns:
            masked (List[Union(torch.Tensor, ComplexTensor)]): [(B, T, N), ...]
            ilens (torch.Tensor): (B,)
            others predicted data, e.g. masks: OrderedDict[
                'mask_spk1': torch.Tensor(Batch, Frames, Freq),
                'mask_spk2': torch.Tensor(Batch, Frames, Freq),
                ...
                'mask_spkn': torch.Tensor(Batch, Frames, Freq),
            ]
        r   )dimc                    s   g | ]} | qS r<   r<   ).0mr>   r<   r=   
<listcomp>{   s    z)SkiMSeparator.forward.<locals>.<listcomp>c                 S   s   g | ]	}d  |d qS )z
mask_spk{}r"   )r1   )rC   ir<   r<   r=   rF   ~   s    noise1)r	   absshaper3   viewr2   r   unbindr   r   ziprangelen)r9   r>   r?   r@   featureBTN	processedmasks
mask_noisemaskedothersr<   rE   r=   forwardS   s    



zSkiMSeparator.forwardc                 C   s   | j S N)r/   )r9   r<   r<   r=   r      s   zSkiMSeparator.num_spk)
Tr   Fr   r   r   r   r   r   FrZ   )__name__
__module____qualname____doc__intboolstrfloatr.   r   r4   Tensorr   r   r   r   r   r   rY   propertyr   __classcell__r<   r<   r:   r=   r      s^    	
4
2r   )collectionsr   typingr   r   r   r   r   r4   torch_complex.tensorr    espnet2.enh.layers.complex_utilsr	   espnet2.enh.layers.skimr
   #espnet2.enh.separator.abs_separatorr   r   r<   r<   r<   r=   <module>   s    