o
    s·¯iÝU  ã                   @   sN   d dl Zd dlZd dlmZ d dlmZ d dlZd dlZG dd„ dejƒZ	dS )é    N)Útensorc                       s¤   e Zd ZdZ					d%‡ fdd„	Zd&d	d
„Zdd„ Zdd„ Zd'dd„Zdd„ Z	dd„ Z
dd„ Zdd„ Zdd„ Zedd„ ƒZdd„ Zdd „ Zd!d"„ Zd#d$„ Z‡  ZS )(ÚSingleSrcPMSQEa™	  Computes the Perceptual Metric for Speech Quality Evaluation (PMSQE)
    as described in [1].
    This version is only designed for 16 kHz (512 length DFT).
    Adaptation to 8 kHz could be done by changing the parameters of the
    class (see Tensorflow implementation).
    The SLL, frequency and gain equalization are applied in each
    sequence independently.

    Parameters:
        window_name (str): Select the used window function for the correct
            factor to be applied. Defaults to sqrt hanning window.
            Among ['rect', 'hann', 'sqrt_hann', 'hamming', 'flatTop'].
        window_weight (float, optional): Correction to the window factor
            applied.
        bark_eq (bool, optional): Whether to apply bark equalization.
        gain_eq (bool, optional): Whether to apply gain equalization.
        sample_rate (int): Sample rate of the input audio.

    References
        [1] J.M.Martin, A.M.Gomez, J.A.Gonzalez, A.M.Peinado 'A Deep Learning
        Loss Function based on the Perceptual Evaluation of the
        Speech Quality', IEEE Signal Processing Letters, 2018.
        Implemented by Juan M. Martin. Contact: mdjuamart@ugr.es

        Copyright 2019: University of Granada, Signal Processing, Multimedia
        Transmission and Speech/Audio Technologies (SigMAT) Group.

    .. note:: Inspired on the Perceptual Evaluation of the Speech Quality (PESQ)
        algorithm, this function consists of two regularization factors :
        the symmetrical and asymmetrical distortion in the loudness domain.

    Examples
        >>> import torch
        >>> from asteroid_filterbanks import STFTFB, Encoder, transforms
        >>> from asteroid.losses import PITLossWrapper, SingleSrcPMSQE
        >>> stft = Encoder(STFTFB(kernel_size=512, n_filters=512, stride=256))
        >>> # Usage by itself
        >>> ref, est = torch.randn(2, 1, 16000), torch.randn(2, 1, 16000)
        >>> ref_spec = transforms.mag(stft(ref))
        >>> est_spec = transforms.mag(stft(est))
        >>> loss_func = SingleSrcPMSQE()
        >>> loss_value = loss_func(est_spec, ref_spec)
        >>> # Usage with PITLossWrapper
        >>> loss_func = PITLossWrapper(SingleSrcPMSQE(), pit_from='pw_pt')
        >>> ref, est = torch.randn(2, 3, 16000), torch.randn(2, 3, 16000)
        >>> ref_spec = transforms.mag(stft(ref))
        >>> est_spec = transforms.mag(stft(est))
        >>> loss_value = loss_func(ref_spec, est_spec)
    Ú	sqrt_hannç      ð?Té€>  c                    sè   t ƒ  ¡  || _|| _|| _|| _|dvrtd |¡ƒ‚|| _|dkr0d| _	d| _
d| _d| _nd| _	d| _
d	| _d
| _d| _d| j | _|  |¡}|| j | _d | _d | _d | _d | _d | _|  | j¡ t t | j¡¡| _d| _d S )N)r   é@  zUnsupported sample rate {}r   gørÎÎxüÜ>g)zàc°âÇ?i   é1   g=c»züü>é   é*   gš™™™™™¹?g-²ï§ÆÓ?g:Œ0âŽyE>)ÚsuperÚ__init__Úwindow_nameÚwindow_weightÚbark_eqÚgain_eqÚ
ValueErrorÚformatÚsample_rateÚSpÚSlÚnbinsÚnbarkÚalphaÚbetaÚget_correction_factorÚpow_correc_factorÚabs_thresh_powerÚmodified_zwicker_powerÚwidth_of_band_barkÚbark_matrixÚmask_sllÚpopulate_constantsÚtorchÚsqrtÚsumÚsqrt_total_widthÚEPS)Úselfr   r   r   r   r   r   ©Ú	__class__© úI/home/ubuntu/.local/lib/python3.10/site-packages/asteroid/losses/pmsqe.pyr   <   s:   


zSingleSrcPMSQE.__init__Nc                 C   sn  |j |j ksJ ‚z|j  | jd d ¡}W n ty(   td | jd d ¡ƒ‚w |dkr9| dd¡}| dd¡}|durJ|dkrG| dd¡n|}ntj|j d |j d d|jd}|  	||¡}|  	||¡}|  
|¡}|  
|¡}| jry|  ||¡}| jr‚|  ||¡}|  ||¡\}	}
|  |d¡}|  |	|
|¡\}}dd	g}| j| | j|  | }tj||d
| |¡ }|S )a:  
        Args
            est_targets (torch.Tensor): Dimensions (B, T, F).
                Padded degraded power spectrum in time-frequency domain.
            targets (torch.Tensor): Dimensions (B, T, F).
                Zero-Padded reference power spectrum in time-frequency domain.
            pad_mask (torch.Tensor, optional):  Dimensions (B, T, 1). Mask
                to indicate the padding frames. Defaults to all ones.

        Dimensions
            B: Number of sequences in the batch.
            T: Number of time frames.
            F: Number of frequency bins.

        Returns
            torch.tensor of shape (B, ), wD + 0.309 * wDA

        ..note:: Dimensions (B, F, T) are also supported by SingleSrcPMSQE but are
            less efficient because input tensors are transposed (not inplace).

        é   é   zNCould not find dimension with {} elements in input tensors, verify your inputsNr   )Údevicer   éÿÿÿÿéþÿÿÿ)Údim)ÚshapeÚindexr   r   r   Ú	transposer"   Úonesr.   Úmagnitude_at_sllÚbark_computationr   Úbark_freq_equalizationr   Úbark_gain_equalizationÚcompute_distortion_tensorsÚcompute_audible_powerÚper_frame_distortionr   r   r$   )r'   Úest_targetsÚtargetsÚpad_maskÚfreq_idxÚref_spectraÚdeg_spectraÚref_bark_spectraÚdeg_bark_spectraÚsym_dÚasym_dÚaudible_power_refÚwd_frameÚ	wda_frameÚdimsÚpmsqe_frameÚpmsqer*   r*   r+   Úforwardg   s@   ýÿÿ

zSingleSrcPMSQE.forwardc                 C   sR   || | j  }tj|ddd}tj|ddd}tj|ddd}|| }d| | S )Nr/   T©r1   Úkeepdimr0   ç    ÐcA)r    r"   Úmeanr$   )r'   Úspectrar?   Úmasked_spectraÚfreq_mean_masked_spectraÚsum_spectraÚseq_lenÚmean_powr*   r*   r+   r6   ­   s   zSingleSrcPMSQE.magnitude_at_sllc                 C   s   | j t || j¡ S ©N)r   r"   Úmatmulr   )r'   rR   r*   r*   r+   r7   ¹   s   zSingleSrcPMSQE.bark_computationc                 C   s.   t  || j| k|t  |¡¡}t j|dddS )Nr/   TrN   )r"   Úwherer   Ú
zeros_liker$   )r'   Úbark_spectraÚfactorÚthr_barkr*   r*   r+   r;   ¼   s   ýz$SingleSrcPMSQE.compute_audible_powerc                 C   s\   |   |d¡}|   |d¡}|d |d  }t |dt |¡ ¡}t |dt |¡ ¡}|| S )Nr   g     ˆ³@g      @ga2U0*©3?)r;   r"   ÚminÚ	ones_likeÚmax)r'   rC   rD   rG   Úaudible_power_degÚgainÚlimited_gainr*   r*   r+   r9   Æ   s   z%SingleSrcPMSQE.bark_gain_equalizationc                 C   sÎ   |   |d¡}|dk}|| jd k}t ||t |¡¡}t ||t |¡¡}tjt ||t |¡¡ddd}tjt ||t |¡¡ddd}	|d |	d  }
t |
dt |
¡ ¡}
t |
dt |
¡ ¡}
|
| S )z1This version is applied in the degraded directly.g      Y@rP   r0   TrN   g     @@g{®Gáz„?)	r;   r   r"   rZ   r[   r$   r_   r`   ra   )r'   rC   rD   Úaudible_power_x100Ú
not_silentÚcond_thrÚref_thresholdedÚdeg_thresholdedÚavg_ppb_refÚavg_ppb_degÚ	equalizerr*   r*   r+   r8   Ò   s.   ÿÿýýz%SingleSrcPMSQE.bark_freq_equalizationc                 C   s`   t  | jd | j¡}t  dd| | j  | j¡d }| j| | }|| jk }t  |t  |¡|¡S )Nç      à?r   )r"   Úpowr   r   r   rZ   r[   )r'   r\   ÚatermÚbtermÚloudness_densÚcondr*   r*   r+   Úloudness_computationñ   s   ÿÿ
z#SingleSrcPMSQE.loudness_computationc                 C   s®   |   |¡}|   |¡}t || ¡}dt ||¡ }t || t |¡| j ¡}t |d |d  d¡}|dt |¡ k }	t |	t 	|¡t |dt |¡ ¡¡}
|
| }||fS )Ng      Ð?g      I@g333333ó?g      @g      (@)
rs   r"   Úabsr_   ra   r`   r&   rn   rZ   r[   )r'   Úref_bark_specÚdeg_bark_specÚoriginal_loudnessÚdistorted_loudnessÚrÚmrE   Úasymrr   Úasym_factorrF   r*   r*   r+   r:   ü   s   

ÿz)SingleSrcPMSQE.compute_distortion_tensorsc           	      C   sš   t jt  || j d¡| j ddd}t  |¡| j }t j|| j ddd}t  |d d d¡}t  || dt  |¡ ¡}t  || dt  |¡ ¡}||fS )	Nç       @r/   TrN   g     jø@rP   g{®Gáz¤?g     €F@)	r"   r$   rn   r   r&   r#   r%   r_   r`   )	r'   rE   rF   Útotal_power_refÚd_frameÚda_frameÚweightsrH   rI   r*   r*   r+   r<     s   ÿz#SingleSrcPMSQE.per_frame_distortionc                 C   sJ   | dkrdS | dkrdS | dkrdS | dkrdS | d	krd
S t d | ¡ƒ‚)z<Returns the power correction factor depending on the window.Úrectr   ÚhanngVUUUU@r   r}   Úhammingg´Iá·€!@ÚflatTopg^o©Ô@zUnexpected window type {})r   r   )r   r*   r*   r+   r   $  s   z$SingleSrcPMSQE.get_correction_factorc                 C   s”   |dkr	|   ¡  n|dkr|  ¡  tj| jd d gtjd}d|d< d|d	d
…< d|d
< | j| jd  | jd  }|| }tjt	|ƒdd| _
d S )Nr   r   r,   r-   )r2   Údtypegš™™™™™Ù?é   r   é   éh   rm   r}   F©Úrequires_grad)Úregister_8k_constantsÚregister_16k_constantsÚnpÚzerosr   Úfloat32r   ÚnnÚ	Parameterr   r    )r'   r   r    Ú
correctionr*   r*   r+   r!   4  s   
z!SingleSrcPMSQE.populate_constantsc                 C   óž   g d¢}t jt|ƒdd| _g d¢}t jt|ƒdd| _g d¢}t jt|ƒdd| _t t¡j	 
¡ }tj |d¡}|  |¡d  d¡}t jt|ƒdd| _d S )	N)1g   @„tˆAç   ÀZºBAç   €©Hñ@çzTüßÉ!³@çÓÁú?—[’@çBÎûÿ¸Px@ç¨ŒŸ-Z@ç`ºÚF@çêxÌ@eÈ1@ç6±ÀWt‹#@çÕæÿU—@çK­÷í¸@çõÚl¬Ä|þ?çjÞqŠŽ$ô?çën‡†Eï?çqh”.ç?çàe†²þá?ç8Ùî@Ý?çJëÿæØ?çFì@1Õ?ça8×0CãÒ?çÌ²'Í9Ñ?çÁâpæWsÐ?ç>[{Ð?r«   r«   r«   çnÂ½2oÕÐ?çÕ^DÛ1uÒ?ç‚çÞÃ%ÇÓ?çaˆœ¾ž¯Õ?çep”¼:Ç×?çHÁSÈ•zÙ?ç.‘ÎàïÛ?ç¬sÈ^ïÝ?ç ÑŠXß?çrÅÅQ¹	à?rµ   çxšÌx[ià?ç¨Ç¶8Ëà?r·   r·   r¶   gR›8¹ß¡Þ?gé+H3MÛ?r°   gäiù«<×?gÖmPû­Ú?g ùœ»]/á?FrŠ   )1ç7ì8|6UÐ?r¸   r¸   r¸   ç§°£M§Ð?ç"·4ò¥ÀÏ?çŽ°;˜‰TÏ?ç ä©TEñÎ?çsá¿…•Î?çÒ¿M;@Î?ç‹£°£ðÍ?çÌ/íÊ¥Í?çq=
×£pÍ?rÁ   rÁ   rÁ   rÁ   rÁ   rÁ   rÁ   rÁ   rÁ   rÁ   rÁ   rÁ   rÁ   rÁ   rÁ   rÁ   rÁ   rÁ   rÁ   rÁ   rÁ   rÁ   rÁ   rÁ   rÁ   rÁ   rÁ   rÁ   rÁ   rÁ   rÁ   rÁ   rÁ   rÁ   rÁ   rÁ   )1ç»d#Ù#Ä?çÀ”ZÔ?ç(îx“ß¢Ô?çgÔ|•|ìÔ?çÿZ^¹Þ6Õ?çñÿ‚Õ?ç˜6ÊúÍÕ?çÅ9êè¸Ö?ç§Ê÷ŒDhÖ?çl´è¡¶Ö?çå›ÈÌ×?ç@Â0`ÉU×?çª¶›à›¦×?çRÔ™{Hø×?ç	3mÿÊJØ?çþºÓ'žØ?ç\T‹ˆbòØ?çSçQñGÙ?ç‡£«twÙ?çAÒ§UôÙ?çÙYLÚ?ç—:ÈëÁ¤Ú?çá}U.TþÚ?çKs+„ÕXÛ?ç©2Œ»A´Û?ç'¤5Ü?çó¯å•ëmÜ?çVœj-ÌÜ?ç¢~¶f+Ý?ç³)Wx—‹Ý?çš'×ÈìÝ?ç*ÙYôNÞ?çcc^G²Þ?çÍYŸrLß?ç£ x|{ß?ç€^»´áß?ç¨Ã
·|$à?çe¡Xà?çZd;ßOà?çQ1Îß„Âà?çRÔ™{Høà?çìˆC6.á?g§¥heá?g?tA}Ëœá?gøª•	¿Ôá?gÒ«JCâ?gŸŽÇTFâ?g     €â?gTS’u8ºâ?zbark_matrix_16k.matÚBark_matrix_16kr   ©r‘   r’   r   r   r   r   ÚpathlibÚPathÚ__file__ÚparentÚabsoluteÚosÚpathÚjoinÚload_matÚastyper   ©r'   r   Úmodif_zwicker_powerr   Ú
local_pathÚ	bark_pathr   r*   r*   r+   r   B  s   333z%SingleSrcPMSQE.register_16k_constantsc                 C   r”   )	N)*iˆr•   r–   r—   r˜   r™   rš   r›   rœ   r   rž   rŸ   r    r¡   r¢   r£   r¤   r¥   r¦   r§   r¨   r©   rª   r«   r«   r«   r«   r¬   r­   r®   r¯   r°   r±   r²   r³   r´   rµ   rµ   r¶   r·   r·   r·   FrŠ   )*r¸   r¸   r¸   r¸   r¹   rº   r»   r¼   r½   r¾   r¿   rÀ   rÁ   rÁ   rÁ   rÁ   rÁ   rÁ   rÁ   rÁ   rÁ   rÁ   rÁ   rÁ   rÁ   rÁ   rÁ   rÁ   rÁ   rÁ   rÁ   rÁ   rÁ   rÁ   rÁ   rÁ   rÁ   rÁ   rÁ   rÁ   rÁ   rÁ   )*rÂ   rÃ   rÄ   rÅ   rÆ   rÇ   rÈ   rÉ   rÊ   rË   rÌ   rÍ   rÎ   rÏ   rÐ   rÑ   rÒ   rÓ   rÔ   rÕ   rÖ   r×   rØ   rÙ   rÚ   rÛ   rÜ   rÝ   rÞ   rß   rà   rá   râ   rã   rä   rå   ræ   rç   rè   ré   rê   rë   zbark_matrix_8k.matÚBark_matrix_8kr   rí   rø   r*   r*   r+   rŒ   è  s   ,,,z$SingleSrcPMSQE.register_8k_constantsc                 O   s   ddl m} ||i |¤ŽS )Nr   )Úloadmat)Úscipy.iorý   )r'   ÚargsÚkwargsrý   r*   r*   r+   rö   y  s   zSingleSrcPMSQE.load_mat)r   r   TTr   rX   )r   )Ú__name__Ú
__module__Ú__qualname__Ú__doc__r   rM   r6   r7   r;   r9   r8   rs   r:   r<   Ústaticmethodr   r!   r   rŒ   rö   Ú__classcell__r*   r*   r(   r+   r   	   s2    4ú
+F


 ' r   )
ÚnumpyrŽ   r"   r   Útorch.nnr‘   rî   ró   ÚModuler   r*   r*   r*   r+   Ú<module>   s    