o
    i@                     @   s   d dl Z d dlmZmZ d dlmZ d dlZd dlm  m	Z
 d dlmZ d dlmZmZmZ d dlmZ eejedkZee jZdd	d
ZG dd deeZG dd deZG dd deZG dd deZG dd deZG dd deZ dS )    N)ABCabstractmethod)reduce)parse)complex_norm
is_complexnew_complex_like)
AbsEnhLossz1.9.0IAMc                    s  |  }|dv sJ d| dg }|d jjk r#fdd|D }|dur6|jjk r6|dj}t|D ];\ d}|d	krk|du rSfd
d|D }nfdd||g D }tdd |}| }n|dkrd}t fddt|D }|dur||7 }t	
dt	|
dt  
|}n|dkrt	t	t  }|jddd}n|dks|dkrt	t  }	t	t  }
|	j|
j |	j|
j  }t	t	t  | }|dkr|jdddn|jddd}nq|dkr.t	t  }	t	t  }
|	j|
j |	j|
j  }t	
dt	
dt  | }|jddd}n6|dkrdj
dj
d t }jj jj  | }jj jj  | }t||g}|dusqJ d| d|| q:|S )aX  Create mask label.

    Args:
        mix_spec: ComplexTensor(B, T, [C,] F)
        ref_spec: List[ComplexTensor(B, T, [C,] F), ...]
        noise_spec: ComplexTensor(B, T, [C,] F)
            only used for IBM and IRM
        mask_type: str
    Returns:
        labels: List[Tensor(B, T, [C,] F), ...] or List[ComplexTensor(B, T, F), ...]
    )IBMIRMr
   PSMNPSMPSM^2CIRMz
mask type z not supportedr   c                    s   g | ]}| d  jqS )   )	unsqueeze	expand_asreal).0r)mix_spec Y/home/ubuntu/.local/lib/python3.10/site-packages/espnet2/enh/loss/criterions/tf_domain.py
<listcomp>,   s    z&_create_mask_label.<locals>.<listcomp>Nr   r   c                       g | ]
}t  t |kqS r   absr   nr   r   r   r   4       c                    r   r   r   r   r    r   r   r   6   r!   c                 S      | | S Nr   xyr   r   r   <lambda>7       z$_create_mask_label.<locals>.<lambda>r   g      ?c                 3   s     | ]\}}| kr|V  qd S r#   r   )r   ir   )idxr   r   	<genexpr>;   s    z%_create_mask_label.<locals>.<genexpr>r
      )minmaxr   r   r   r   )upperndimr   r   r   	enumerater   intsumr   powEPSclampimagr   append)r   ref_spec
noise_spec	mask_type
mask_labelmaskflagsbetares_specphase_r	phase_mix	cos_thetadenominator	mask_real	mask_imagr   )r*   r   r   r   _create_mask_label   sb   

	
(
$
rH   c                       s   e Zd ZdZeedefddZeedefddZ	edefddZ
edefd	d
ZedefddZedefddZ	d fdd	ZdddZ  ZS )FrequencyDomainLossz=Base class for all frequence-domain Enhancement loss modules.returnc                   C      d S r#   r   r   r   r   r   compute_on_maskd      z#FrequencyDomainLoss.compute_on_maskc                   C   rK   r#   r   r   r   r   r   r<   j   rM   zFrequencyDomainLoss.mask_typec                 C      | j S r#   )_nameselfr   r   r   nameo      zFrequencyDomainLoss.namec                 C   rN   r#   )_only_for_testrP   r   r   r   only_for_tests   rS   z!FrequencyDomainLoss.only_for_testc                 C   rN   r#   )_is_noise_lossrP   r   r   r   is_noise_lossw   rS   z!FrequencyDomainLoss.is_noise_lossc                 C   rN   r#   )_is_dereverb_lossrP   r   r   r   is_dereverb_loss{   rS   z$FrequencyDomainLoss.is_dereverb_lossFc                    s:   t    || _|| _|| _|| _|r|rtdd S d S )NzF`is_noise_loss` and `is_dereverb_loss` cannot be True at the same time)super__init__rO   rT   rV   rX   
ValueError)rQ   rR   rU   rW   rY   	__class__r   r   r[      s   
zFrequencyDomainLoss.__init__Nc                 C   s   t |||| jdS )N)r   r:   r;   r<   )rH   r<   )rQ   r   r:   r;   r   r   r   create_mask_label   s   z%FrequencyDomainLoss.create_mask_label)FFFr#   )__name__
__module____qualname____doc__propertyr   boolrL   strr<   rR   rU   rW   rY   r[   r_   __classcell__r   r   r]   r   rI   `   s&    rI   c                       ^   e Zd Z						d fdd	ZedefddZedefd	d
Zde	j
fddZ  ZS )FrequencyDomainMSEFr   Nc                    F   |d ur|}n
|rd| }nd}t  j||||d || _|| _d S )NMSE_on_MSE_on_SpecrU   rW   rY   rZ   r[   _compute_on_mask
_mask_typerQ   rL   r<   rR   rU   rW   rY   rO   r]   r   r   r[         	
zFrequencyDomainMSE.__init__rJ   c                 C   rN   r#   ro   rP   r   r   r   rL      rS   z"FrequencyDomainMSE.compute_on_maskc                 C   rN   r#   rp   rP   r   r   r   r<      rS   zFrequencyDomainMSE.mask_typec                 C   s   |j |j ksJ |j |j f|| }t|r!|jd |jd  }n|d }| dkr5|jddgd}|S | dkrE|jg dd}|S td|j |j )ztime-frequency MSE loss.

        Args:
            ref: (Batch, T, F) or (Batch, T, C, F)
            inf: (Batch, T, F) or (Batch, T, C, F)
        Returns:
            loss: (Batch,)
        r      r,   dim   r,   r   ru   #Invalid input shape: ref={}, inf={})shaper   r   r8   rw   meanr\   format)rQ   refinfdiffmselossr   r   r   forward   s   	zFrequencyDomainMSE.forwardFr   NFFFr`   ra   rb   r[   rd   re   rL   rf   r<   torchTensorr   rg   r   r   r]   r   ri          ri   c                       rh   )FrequencyDomainL1Fr   Nc                    rj   )NL1_on_
L1_on_Specrm   rn   rq   r]   r   r   r[      rr   zFrequencyDomainL1.__init__rJ   c                 C   rN   r#   rs   rP   r   r   r   rL      rS   z!FrequencyDomainL1.compute_on_maskc                 C   rN   r#   rt   rP   r   r   r   r<      rS   zFrequencyDomainL1.mask_typec                 C   s   |j |j ksJ |j |j ft|r-t|j|j t|j|j  t| |   }nt|| }| dkrC|jddgd}|S | dkrS|jg dd}|S td|j |j )ztime-frequency L1 loss.

        Args:
            ref: (Batch, T, F) or (Batch, T, C, F)
            inf: (Batch, T, F) or (Batch, T, C, F)
        Returns:
            loss: (Batch,)
        ru   r,   r   rv   rx   ry   rz   )	r{   r   r   r   r8   rw   r|   r\   r}   )rQ   r~   r   l1lossr   r   r   r      s$   	zFrequencyDomainL1.forwardr   r   r   r   r]   r   r      r   r   c                       s`   e Zd Z							d fdd	Zedefdd	Zedefd
dZde	j
fddZ  ZS )FrequencyDomainDPCLFr   dpclNc           	         s:   |d u rdn|}t  j||||d || _|| _|| _d S )Nr   rm   )rZ   r[   ro   rp   
_loss_type)	rQ   rL   r<   	loss_typerR   rU   rW   rY   rO   r]   r   r   r[     s   

zFrequencyDomainDPCL.__init__rJ   c                 C   rN   r#   rs   rP   r   r   r   rL   +  rS   z#FrequencyDomainDPCL.compute_on_maskc                 C   rN   r#   rt   rP   r   r   r   r<   /  rS   zFrequencyDomainDPCL.mask_typec                    sh  t |dksJ t |}dd |D  | jdkrat d }|d jd }t|D ] fdd D }tdd |}|  }||7 }q*| 	 
 }tj||d}| |d	|}n| jd
kr|d jd }tj||fd	| t||d   |j|jd}	t|D ]|d | t||d   |	 < qtj|d jd |d jd |d jd ||jd}t|D ] fdd D }tdd |}| }|	 ||dk< q| |d	|}n	td| j dtt|dd|djdd}
tt|dd | djdd}tt|dd| djdd}|
| d|  S )a  time-frequency Deep Clustering loss.

        References:
            [1] Deep clustering: Discriminative embeddings for segmentation and
                separation; John R. Hershey. et al., 2016;
                https://ieeexplore.ieee.org/document/7471631
            [2] Manifold-Aware Deep Clustering: Maximizing Angles Between Embedding
                Vectors Based on Regular Simplex; Tanaka, K. et al., 2021;
                https://www.isca-speech.org/archive/interspeech_2021/tanaka21_interspeech.html

        Args:
            ref: List[(Batch, T, F) * spks]
            inf: (Batch, T*F, D)
        Returns:
            loss: (Batch,)
        r   c                 S   s   g | ]}t |qS r   r   r   r   r   r   r   H  s    z/FrequencyDomainDPCL.forward.<locals>.<listcomp>r   c                       g | ]}  |kqS r   r   r   abs_refr)   r   r   r   M      c                 S   r"   r#   r   r$   r   r   r   r'   N  r(   z-FrequencyDomainDPCL.forward.<locals>.<lambda>)num_classesr/   mdcr,   )dtypedevicer   )r   c                    r   r   r   r   r   r   r   r   i  r   c                 S   r"   r#   r   r$   r   r   r   r'   j  r(   zInvalid loss type error: z', the loss type must be "dpcl" or "mdc")r,   r   rv   )lenr   r   
zeros_liker{   ranger   r3   
contiguousflattenlongFone_hotviewfullmathsqrtr   r   zerosr\   matmul	transposer5   r4   float)rQ   r~   r   num_spkr   Br?   r>   remanifold_vectorV2Y2VYr   r   r   r   3  sd   



$(zFrequencyDomainDPCL.forward)Fr   r   NFFFr   r   r   r]   r   r     s    r   c                       s^   e Zd Z						d fdd	ZedefddZedefdd	Zde	j
fd
dZ  ZS )FrequencyDomainAbsCoherenceFNc                    s4   |d u rdn|}t  j||||d d| _d | _d S )NCoherence_on_Specrm   Frn   rq   r]   r   r   r[     s   	
z$FrequencyDomainAbsCoherence.__init__rJ   c                 C   rN   r#   rs   rP   r   r   r   rL     rS   z+FrequencyDomainAbsCoherence.compute_on_maskc                 C   rN   r#   rt   rP   r   r   r   r<     rS   z%FrequencyDomainAbsCoherence.mask_typec                 C   s   |j |j ksJ |j |j ft|rct|rct|ddt|dd |d t }||  jdd | }| dkrGd|jdd }|S | dkrYd|jddgd }|S t	d
|j |j t	d)	aq  time-frequency absolute coherence loss.

        Reference:
            Independent Vector Analysis with Deep Neural Network Source Priors;
            Li et al 2020; https://arxiv.org/abs/2008.11273

        Args:
            ref: (Batch, T, F) or (Batch, T, C, F)
            inf: (Batch, T, F) or (Batch, T, C, F)
        Returns:
            loss: (Batch,)
        r,   rv   ru   g      ?rx   r   rz   z(`ref` and `inf` must be complex tensors.)r{   r   r   sizer6   conjr|   r   rw   r\   r}   )rQ   r~   r   denomcohcoh_lossr   r   r   r     s   $	z#FrequencyDomainAbsCoherence.forward)FNNFFFr   r   r   r]   r   r     s    r   c                       s`   e Zd Z							d fdd	ZedefddZedefd	d
Zde	j
fddZ  ZS )FrequencyDomainCrossEntropyFNc           	         s^   |d ur|}n
|rd| }nd}t  j||||d || _|| _tjj|dd| _|| _d S )NCE_on_
CE_on_Specrm   none)ignore_index	reduction)	rZ   r[   ro   rp   r   nnCrossEntropyLosscross_entropy	ignore_id)	rQ   rL   r<   r   rR   rU   rW   rY   rO   r]   r   r   r[     s"   

z$FrequencyDomainCrossEntropy.__init__rJ   c                 C   rN   r#   rs   rP   r   r   r   rL     rS   z+FrequencyDomainCrossEntropy.compute_on_maskc                 C   rN   r#   rt   rP   r   r   r   r<     rS   z%FrequencyDomainCrossEntropy.mask_typec                 C   sn  |j d |j d kr|j d |j d ksJ |j |j f| dkr2| |ddd|jdd}n#| dkrK| |dddd|jddgd}n
td|j |j t T |	d}|| j
k}||k| d }| dkr|jdd|jdd  }n| dkr|jddgd|jddgd  }d| d	 i| _W d
   |S 1 sw   Y  |S )ztime-frequency cross-entropy loss.

        Args:
            ref: (Batch, T) or (Batch, T, C)
            inf: (Batch, T, nclass) or (Batch, T, C, nclass)
        Returns:
            loss: (Batch,)
        r   r,   r   rv   ru   rz   r/   accd   N)r{   rw   r   permuter|   r\   r}   r   no_gradargmaxr   masked_fillr   r4   cpustats)rQ   r~   r   losspredr>   	numeratorr   r   r   r   r     s0   *	 &


$


z#FrequencyDomainCrossEntropy.forward)FNr   NFFFr   r   r   r]   r   r     s    r   )Nr
   )!r   abcr   r   	functoolsr   r   torch.nn.functionalr   
functionalr   packaging.versionr   V espnet2.enh.layers.complex_utilsr   r   r   $espnet2.enh.loss.criterions.abs_lossr	   __version__is_torch_1_9_plusfinfoget_default_dtypeepsr6   rH   rI   ri   r   r   r   r   r   r   r   r   <module>   s"    
O8=@j?