o
    if(                     @   st   d dl mZ d dlmZmZmZmZmZ d dlZd dl	m
Z
 d dlmZ d dlmZ d dlmZ G dd	 d	eZdS )
    )OrderedDict)DictListOptionalTupleUnionN)ComplexTensor)DNN_Beamformer)DNN_WPE)AbsSeparatorc                G       sD  e Zd Z												
		
		
							
					
	
						
d@dedededededededededededededed ed!ed"ed#ed$ed%ed&ed'ed(ed)ed*ed+ed,ed-ed.ed/ed0ed1ed2ed3ed4efF fd5d6Z	7dAd8ee	j
ef d9e	j
d:ee d;eeee	j
ef  e	j
ef fd<d=Zed>d? Z  ZS )BNeuralBeamformer   mask_mseFblstmp   ,  @             Tcrelusigmoidmvdr_souden   Hz>ư>	input_dimnum_spk	loss_typeuse_wpe	wnet_typewlayerswunitswprojswdropout_ratetapsdelayuse_dnn_mask_for_wpe
wnonlinearmulti_source_wpewnormalizationuse_beamformer	bnet_typeblayersbunitsbprojsbadimref_channeluse_noise_mask
bnonlinearbeamformer_typertf_iterationsbdropout_rateshared_powerdiagonal_loadingdiag_eps_wpediag_eps_bfmask_flooringflooring_thres_wpeflooring_thres_bfuse_torch_solverc$           %         sx  t    || _|| _|dvrtd| || _|| _| jrf|r#d}$nd}$td$i d|d|d|d|d	|d
|	d|
d|d|d|rHdn|d|d|$d|d|d|d| d|!d|#| _nd | _|| _	| jrt
d$i d|d|d|d|d|d|d|d|d
|d|d|d |d!|d"|
d#|d|d|d| d|"d|#| _nd | _|o|| _d S )%N)r   spectrumspectrum_log	magnitudezUnsupported loss type: %sr   r   wtypewidimr!   r"   r#   dropout_rater%   r&   use_dnn_masknmask	nonlinear
iterationsnormalizationr8   diag_epsr;   flooring_thresr>   bidimbtyper-   r.   r/   r   r2   r0   r1   r4   r5   btapsbdelay )super__init___num_spkr   
ValueErrorr+   r   r
   wper1   r	   
beamformerr7   )%selfr   r   r   r   r    r!   r"   r#   r$   r%   r&   r'   r(   r)   r*   r+   r,   r-   r.   r/   r0   r1   r2   r3   r4   r5   r6   r7   r8   r9   r:   r;   r<   r=   r>   rH   	__class__rP   [/home/ubuntu/.local/lib/python3.10/site-packages/espnet2/enh/separator/neural_beamformer.pyrR      s   
)	

	
zNeuralBeamformer.__init__Ninputilens
additionalreturnc                 C   sf  |  dv sJ |  |}t }| jr| jdur| jdr| jrj|  dkr:| j|d|\}}|	d}n|  dkrI| j||\}}|durjt
|trft| jD ]}|| |d|d < qWn||d	< | jr|  dkr| j||\}}t| jD ]}|| |d
|d < qt|| jkr|| j |d< d||fS d}	|  dkr| jr| |d|\}}}}	t
|trdd |D }|durt| jD ]}d|d }
|| ||
< || 	d|d|
 < qn|	d}|dur||d< |	d|d	< n| jrF| ||\}}}}	|durFt
|tr;t| jD ]}d|d }
|| ||
< || |d|
 < q!n||d< |	d|d	< | jr| jjdrk| jjdrk| jrk| jjdkrm| jdkrmd}	t
|trwtd| j|||	d\}}}t| jD ]}|| |d
|d < qt|| jkr|| j |d< t
|ts|g}|||fS )a  Forward.

        Args:
            input (torch.complex64/ComplexTensor):
                mixed speech [Batch, Frames, Channel, Freq]
            ilens (torch.Tensor): input lengths [Batch]
            additional (Dict or None): other data included in model
                NOTE: not used in this model

        Returns:
            enhanced speech (single-channel): List[torch.complex64/ComplexTensor]
            output lengths
            other predcited data: OrderedDict[
                'dereverb1': ComplexTensor(Batch, Frames, Channel, Freq),
                'mask_dereverb1': torch.Tensor(Batch, Frames, Channel, Freq),
                'mask_noise1': torch.Tensor(Batch, Frames, Channel, Freq),
                'mask_spk1': torch.Tensor(Batch, Frames, Channel, Freq),
                'mask_spk2': torch.Tensor(Batch, Frames, Channel, Freq),
                ...
                'mask_spkn': torch.Tensor(Batch, Frames, Channel, Freq),
            ]
        )r      Nmaskr   r_   zmask_dereverb{}r   mask_dereverb1z
mask_spk{}mask_noise1c                 S   s   g | ]}| d qS )ra   )squeeze).0enhrP   rP   rZ   
<listcomp>   s    z,NeuralBeamformer.forward.<locals>.<listcomp>z
dereverb{}mask_	dereverb1wmpdrwpdzJSingle-source WPE is not supported with beamformer in multi-speaker cases.)powers)dimr   trainingr   
startswithr   rU   predict_mask	unsqueezerd   
isinstancelistranger   formatr+   rV   lenr4   r7   rF   NotImplementedError)rW   r[   r\   r]   enhancedothersmask_wspkothers_brl   keyrP   rP   rZ   forward}   s   









zNeuralBeamformer.forwardc                 C   s   | j S N)rS   )rW   rP   rP   rZ   r     s   zNeuralBeamformer.num_spk)"r   r   Fr   r   r   r   r   r   r   Tr   TFTr   r   r   r   r   r   Tr   r   r   r   TTr   r   Fr   r   Tr   )__name__
__module____qualname__intstrboolfloatrR   r   torchTensorr   r   r   r   r   r   r~   propertyr   __classcell__rP   rP   rX   rZ   r      s    	
!"#$%&'t
 	r   )collectionsr   typingr   r   r   r   r   r   torch_complex.tensorr   !espnet2.enh.layers.dnn_beamformerr	   espnet2.enh.layers.dnn_wper
   #espnet2.enh.separator.abs_separatorr   r   rP   rP   rP   rZ   <module>   s    