o
    i4                     @   s  d dl Z d dlZd dlmZ d dlZd dlZd dlZd dlmZ	 d dl
mZ d dlmZ d dlmZ e	eje	dkZG dd	 d	eeZee jZG d
d deZG dd deZG dd deZG dd deZG dd deZG dd deZG dd deZdS )    N)ABC)parse)ComplexTensor)
AbsEnhLoss)Stftz1.9.0c                       sp   e Zd ZdZedefddZedefddZedefddZ	edefd	d
Z
			d fdd	Z  ZS )TimeDomainLossz8Base class for all time-domain Enhancement loss modules.returnc                 C      | j S N)_nameself r   [/home/ubuntu/.local/lib/python3.10/site-packages/espnet2/enh/loss/criterions/time_domain.pyname      zTimeDomainLoss.namec                 C   r	   r
   )_only_for_testr   r   r   r   only_for_test   r   zTimeDomainLoss.only_for_testc                 C   r	   r
   )_is_noise_lossr   r   r   r   is_noise_loss   r   zTimeDomainLoss.is_noise_lossc                 C   r	   r
   )_is_dereverb_lossr   r   r   r   is_dereverb_loss    r   zTimeDomainLoss.is_dereverb_lossFc                    s^   t    || _|| _|| _|r|rtd|r d|vr |d }|r*d|vr*|d }|| _d S )NzF`is_noise_loss` and `is_dereverb_loss` cannot be True at the same timenoise_noisedereverb	_dereverb)super__init__r   r   r   
ValueErrorr   )r   r   r   r   r   	__class__r   r   r   $   s   

zTimeDomainLoss.__init__)FFF)__name__
__module____qualname____doc__propertystrr   boolr   r   r   r   __classcell__r   r   r   r   r      s    r   c                       sH   e Zd ZdZ					d fdd	Zdejdejd	ejfd
dZ  ZS )	CISDRLossa  CI-SDR loss

    Reference:
        Convolutive Transfer Function Invariant SDR Training
        Criteria for Multi-Channel Reverberant Speech Separation;
        C. Boeddeker et al., 2021;
        https://arxiv.org/abs/2011.15003
    Args:
        ref: (Batch, samples)
        inf: (Batch, samples)
        filter_length (int): a time-invariant filter that allows
                                slight distortion via filtering
    Returns:
        loss: (Batch,)
       NFc                    s.   |d u rdn|}t  j||||d || _d S )Nci_sdr_lossr   r   r   )r   r   filter_length)r   r-   r   r   r   r   r   r   r   r   r   Q   s   
zCISDRLoss.__init__refinfr   c                 C   s2   |j |j ksJ |j |j ftjj||d| jdS )NF)compute_permutationr-   )shapeci_sdrptr+   r-   )r   r.   r/   r   r   r   forwardc   s   
zCISDRLoss.forward)r*   NFFF	r!   r"   r#   r$   r   torchTensorr4   r(   r   r   r   r   r)   @   s    r)   c                       sD   e Zd Zeddddf fdd	Zdejdejdejfdd	Z  ZS )
SNRLossNFc                    s2   |d u rdn|}t  j||||d t|| _d S )Nsnr_lossr,   )r   r   floateps)r   r;   r   r   r   r   r   r   r   r   r   q   s   zSNRLoss.__init__r.   r/   r   c              	   C   sR   || }dt t j|dddj| jdt t j|dddj| jd  }| S )N         )pdim)min)r6   log10normclampr;   )r   r.   r/   r   snrr   r   r   r4      s   zSNRLoss.forward)	r!   r"   r#   EPSr   r6   r7   r4   r(   r   r   r   r   r8   p   s    $r8   c                       sP   e Zd ZdZ									d fdd	Zdejd	ejd
ejfddZ  ZS )SDRLossa  SDR loss.

    filter_length: int
        The length of the distortion filter allowed (default: ``512``)
    use_cg_iter:
        If provided, an iterative method is used to solve for the distortion
        filter coefficients instead of direct Gaussian elimination.
        This can speed up the computation of the metrics in case the filters
        are long. Using a value of 10 here has been shown to provide
        good accuracy in most cases and is sufficient when using this
        loss to train neural separation networks.
    clamp_db: float
        clamp the output value in  [-clamp_db, clamp_db]
    zero_mean: bool
        When set to True, the mean of all signals is subtracted prior.
    load_diag:
        If provided, this small value is added to the diagonal coefficients of
        the system metrices when solving for the filter coefficients.
        This can help stabilize the metric in the case where some of the reference
        signals may sometimes be zero
    r*   NTFc
                    sF   |d u rdn|}
t  j|
|||	d || _|| _|| _|| _|| _d S )Nsdr_lossr,   )r   r   r-   use_cg_iterclamp_db	zero_mean	load_diag)r   r-   rI   rJ   rK   rL   r   r   r   r   r   r   r   r   r      s   
zSDRLoss.__init__r.   estr   c              
   C   s(   t j||| j| j| j| j| jdd}|S )a  SDR forward.

        Args:
            ref: Tensor, (..., n_samples)
                reference signal
            est: Tensor (..., n_samples)
                estimated signal

        Returns:
            loss: (...,)
                the SDR loss (negative sdr)
        F)rM   r.   r-   rI   rK   rJ   rL   pairwise)fast_bss_evalrH   r-   rI   rK   rJ   rL   )r   r.   rM   rH   r   r   r   r4      s   zSDRLoss.forward)	r*   NNTNNFFFr5   r   r   r   r   rG      s    $rG   c                       sL   e Zd ZdZ							d fdd	Zdejdejd	ejfd
dZ  ZS )	SISNRLossan  SI-SNR (or named SI-SDR) loss

    A more stable SI-SNR loss with clamp from `fast_bss_eval`.

    Attributes:
        clamp_db: float
            clamp the output value in  [-clamp_db, clamp_db]
        zero_mean: bool
            When set to True, the mean of all signals is subtracted prior.
        eps: float
            Deprecated. Kept for compatibility.
    NTFc           	         sr   |d u rdn|}t  j||||d || _|| _|d ur5td | jd u r7t|d|   d | _d S d S d S )Nsi_snr_lossr,   z7Eps is deprecated in si_snr loss, set clamp_db instead.r>   
   )r   r   rJ   rK   loggingwarningmathrB   )	r   rJ   rK   r;   r   r   r   r   r   r   r   r   r      s   


zSISNRLoss.__init__r.   rM   r   c                 C   s8   t |r
t |sJ |tj||| j| jdd}|S )a  SI-SNR forward.

        Args:

            ref: Tensor, (..., n_samples)
                reference signal
            est: Tensor (..., n_samples)
                estimated signal

        Returns:
            loss: (...,)
                the SI-SDR loss (negative si-sdr)
        F)rM   r.   rK   rJ   rN   )r6   	is_tensorrO   si_sdr_lossrK   rJ   )r   r.   rM   si_snrr   r   r   r4     s   zSISNRLoss.forward)NTNNFFFr5   r   r   r   r   rP      s    $rP   c                       6   e Zd Z				d fdd	ZdejfddZ  ZS )	TimeDomainMSENFc                    (   |d u rdn|}t  j||||d d S )NTD_MSE_lossr,   r   r   r   r   r   r   r   r   r   r   r   r        
zTimeDomainMSE.__init__r   c                 C   sz   |j |j ksJ |j |j f|| d}| dkr%|jddgd}|S | dkr3|jdd}|S td|j |j )zTime-domain MSE loss forward.

        Args:
            ref: (Batch, T) or (Batch, T, C)
            inf: (Batch, T) or (Batch, T, C)
        Returns:
            loss: (Batch,)
        r=      r>   r@   #Invalid input shape: ref={}, inf={})r1   powr@   meanr   format)r   r.   r/   mselossr   r   r   r4   .  s   	zTimeDomainMSE.forwardNFFFr!   r"   r#   r   r6   r7   r4   r(   r   r   r   r   rZ         rZ   c                       rY   )	TimeDomainL1NFc                    r[   )N
TD_L1_lossr,   r]   r^   r   r   r   r   F  r_   zTimeDomainL1.__init__r   c                 C   sx   |j |j ksJ |j |j ft|| }| dkr$|jddgd}|S | dkr2|jdd}|S td|j |j )zTime-domain L1 loss forward.

        Args:
            ref: (Batch, T) or (Batch, T, C)
            inf: (Batch, T) or (Batch, T, C)
        Returns:
            loss: (Batch,)
        r`   r>   r=   ra   rb   )r1   absr@   rd   r   re   )r   r.   r/   l1lossr   r   r   r4   U  s   	zTimeDomainL1.forwardrg   rh   r   r   r   r   rj   E  ri   rj   c                       s`   e Zd ZdZdgdddddf fdd	Zed	efd
dZdd Zde	j
de	j
fddZ  ZS )MultiResL1SpecLossaY  Multi-Resolution L1 time-domain + STFT mag loss

    Reference:
    Lu, Y. J., Cornell, S., Chang, X., Zhang, W., Li, C., Ni, Z., ... & Watanabe, S.
    Towards Low-Distortion Multi-Channel Speech Enhancement:
    The ESPNET-Se Submission to the L3DAS22 Challenge. ICASSP 2022 p. 9201-9205.

    Attributes:
        window_sz: (list)
            list of STFT window sizes.
        hop_sz: (list, optional)
            list of hop_sizes, default is each window_sz // 2.
        eps: (float)
            stability epsilon
        time_domain_weight: (float)
            weight for time domain loss.
    r*   Ng:0yE>g      ?Fc              
      s   |d u rdn|}t t| j||d tdd |D sJ || _|d u r-dd |D | _n|| _|| _|| _tj	
g | _t| j| jD ]\}}	t|||	d dddd}
| j|
 qDd S )	Nrk   )r   c                 S   s   g | ]}|d  dkqS )r=   r   r   .0xr   r   r   
<listcomp>  s    z/MultiResL1SpecLoss.__init__.<locals>.<listcomp>c                 S   s   g | ]}|d  qS )r=   r   ro   r   r   r   rr     s    TF)n_fft
win_length
hop_lengthwindowcenter
normalizedonesided)r   rn   r   all	window_szhop_sztime_domain_weightr;   r6   nn
ModuleListstft_encoderszipr   append)r   r{   r|   r;   r}   r   r   r   whstft_encr   r   r   r     s,   		zMultiResL1SpecLoss.__init__r   c                 C   s   dS )Nzl1_timedomain+magspec_lossr   r   r   r   r   r     s   zMultiResL1SpecLoss.namec                 C   s:   t rt|d |d }| S t|d |d }| S )N).r   ).r>   )is_torch_1_9_plusr6   complexr   rl   )r   stftr   r   r   get_magnitude  s
   z MultiResL1SpecLoss.get_magnitudetargetestimatec           
      C   s   |j |j ksJ |j |j ftj|| dddtj|d ddd| j  }tj|| |  dd}t| jdkr;|S t|}| jD ]%}| ||d }| ||| d }tj||  dd}	||	7 }qC|| j	 d| j	 | t| j  S )	zforward.

        Args:
            target: (Batch, T)
            estimate: (Batch, T)
        Returns:
            loss: (Batch,)
        T)keepdimr=   ra   r   )r>   r=   r>   )
r1   r6   sumr;   rl   lenr   
zeros_liker   r}   )
r   r   r   scaling_factortime_domain_lossspectral_lossr   
target_magestimate_magc_lossr   r   r   r4     s*   


zMultiResL1SpecLoss.forward)r!   r"   r#   r$   r   r%   r&   r   r   r6   r7   r4   r(   r   r   r   r   rn   l  s"    #rn   )rS   rU   abcr   r2   rO   r6   packaging.versionr   Vtorch_complex.tensorr   $espnet2.enh.loss.criterions.abs_lossr   espnet2.layers.stftr   __version__r   r   finfoget_default_dtyper;   rF   r)   r8   rG   rP   rZ   rj   rn   r   r   r   r   <module>   s(    ,0MB''