o
    .wiD.                     @   s  d dl Z d dlmZ d dlmZmZ d dlZd dlZd dlm	Z	 d dl
mZmZ d dlmZmZmZ erKerKerKd dlZd dlZd dlZd dlmZ nd\ZZZG d	d
 d
Zdg diZdZdZdZdeddfddZ	d.dedejdee defddZe eZ					d/dej deded ed!ed"e!dej fd#d$Z"d%ej d&e!dej fd'd(Z#			d0d)e	d*ed&e!dee dee d+e!de	fd,d-Z$dS )1    N)	lru_cache)AnyOptional)Tensor)rank_zero_inforank_zero_warn)_LIBROSA_AVAILABLE_ONNXRUNTIME_AVAILABLE_REQUESTS_AVAILABLE)InferenceSession)NNNc                   @   s*   e Zd ZdZdeeef ddfddZdS )r   zDummy InferenceSession.kwargsreturnNc                 K   s   d S N )selfr   r   r   a/home/ubuntu/sommelier/.venv/lib/python3.10/site-packages/torchmetrics/functional/audio/dnsmos.py__init__$   s    zInferenceSession.__init__)__name__
__module____qualname____doc__dictstrr   r   r   r   r   r   r   !   s    r   ))deep_noise_suppression_mean_opinion_score_load_session)requestslibrosaonnxruntime>  gQ"@z~/.torchmetrics/DNSMOS
dnsmos_dirr   c                 C   s   d}t j| } dD ]m}t j| |dd }t jt j|dd t j|rFzt|}W q
 tyE } zt 	| W Y d}~nd}~ww | d| }t
d| d	|  t|}t|d
}||j W d   n1 srw   Y  q
dS )zDownload required DNSMOS files.

    Args:
        dnsmos_dir: a dir to save the downloaded files. Defaults to "~/.torchmetrics".

    z@https://raw.githubusercontent.com/microsoft/DNS-Challenge/master)zDNSMOS/DNSMOS/model_v8.onnxzDNSMOS/DNSMOS/sig_bak_ovr.onnxzDNSMOS/pDNSMOS/sig_bak_ovr.onnx   NT)exist_ok/zdownloading z to wb)ospath
expanduserjoinmakedirsdirnameexistsr   	Exceptionremover   r   getopenwritecontent)r   urlfilesaveto_urlfmyfilefr   r   r   _prepare_dnsmos0   s*   

r8   r%   devicenum_threadsc                 C   s   t j| } t j| stt t }|dur||_||_	|j
dkr-t| dg|d}|S dt v rHddg}d|jii g}t| |||d}|S dt v rcddg}d|jii g}t| |||d}|S t| dg|d}|S )	zLoad onnxruntime session.

    Args:
        path: the model path
        device: the device used
        num_threads: the number of threads to use. Defaults to None.

    Returns:
        onnxruntime session

    NcpuCPUExecutionProvider)	providerssess_optionsCUDAExecutionProvider	device_id)r=   provider_optionsr>   CoreMLExecutionProvider)r$   r%   r&   r*   r8   
DNSMOS_DIRortSessionOptionsinter_op_num_threadsintra_op_num_threadstyper   get_available_providersindex)r%   r9   r:   optsinfsr=   rA   r   r   r   r   O   s,   
r   x   @     Taudion_mels
frame_size
hop_lengthsrto_dbc           	      C   s   | j }| d|d } tjj| ||d ||d}|ddd}||dd |j dd  }|rMt|j d D ]}tj|| tj	dd d ||d	f< q8|S )
aV  Calculate the mel-spectrogram of an audio.

    Args:
        audio: [..., T]
        n_mels: the number of mel-frequencies
        frame_size: stft length
        hop_length: stft hop length
        sr: sample rate of audio
        to_db: convert to dB scale if `True` is given

    Returns:
        mel-spectrogram: [..., num_mel, T']

       )yrT   n_fftrS   rQ   r      N)ref(   .)
shapereshaper   featuremelspectrogram	transposerangepower_to_dbnpmax)	rP   rQ   rR   rS   rT   rU   r]   mel_specbr   r   r   _audio_melspec{   s    &rh   mospersonalizedc                 C   s   |rt g d}t g d}t g d}nt g d}t g d}t g d}|| d | d< || d | d< || d	 | d	< | S )
zUse polyfit to convert raw mos values to DNSMOS values.

    Args:
        mos: the raw mos values, [..., 4]
        personalized: whether interfering speaker is penalized

    Returns:
        DNSMOS: [..., 4]

    )g~Z!ugBt?g?gBü)g/g?,?g/z!?gX-*Ͽ)gPzg 7!BV?gojſgDW ?)g;YRg~?gz?)g
w=gc9?g%Pzu?)ggFu?gCyXٿ).rW   ).rZ   ).   )rd   poly1d)ri   rj   p_ovrp_sigp_bakr   r   r   _polyfit_val   s   rp   predsfscache_sessionc                 C   s  t rtrts
td|durt|n| j}|rtnt}|t d|r$dnd d||}|t d||}t	}	||	krHt
j|   ||	d}
n|   }
tt|	 }|
jd	 |k rktj|
|
gd	d
}
|
jd	 |k s[tt|
jd	 |	 t d }g }|	}t|D ]}|
dt|| t|t | f }|jd	 |k rq|j}|d	|d	 f}t|d}tt|dddf dd}|jdkrdt v sdt v rztj||j|j}tj||j|j}W n ty } zt d|  W Y d}~nd}~ww d|i}d|i}tj|!d|d |!d|d gd	dd}t"||}||dd	 d }|#| qt$tj%tj&|d	d
d	d
S )u
  Calculate `Deep Noise Suppression performance evaluation based on Mean Opinion Score`_ (DNSMOS).

    Human subjective evaluation is the ”gold standard” to evaluate speech quality optimized for human perception.
    Perceptual objective metrics serve as a proxy for subjective scores. The conventional and widely used metrics
    require a reference clean speech signal, which is unavailable in real recordings. The no-reference approaches
    correlate poorly with human ratings and are not widely adopted in the research community. One of the biggest
    use cases of these perceptual objective metrics is to evaluate noise suppression algorithms. DNSMOS generalizes
    well in challenging test conditions with a high correlation to human ratings in stack ranking noise suppression
    methods. More details can be found in `DNSMOS paper <https://arxiv.org/abs/2010.15258>`_ and
    `DNSMOS P.835 paper <https://arxiv.org/abs/2110.01763>`_.


    .. hint::
        Using this metric requires you to have ``librosa``, ``onnxruntime`` and ``requests`` installed. Install
        as ``pip install torchmetrics['audio']`` or alternatively ``pip install librosa onnxruntime-gpu requests``
        (if you do not have GPU enabled machine install ``onnxruntime`` instead of ``onnxruntime-gpu``)

    Args:
        preds: [..., time]
        fs: sampling frequency
        personalized: whether interfering speaker is penalized
        device: the device used for calculating DNSMOS, can be cpu or cuda:n, where n is the index of gpu.
            If None is given, then the device of input is used.
        num_threads: the number of threads to use for cpu inference. Defaults to None.
        cache_session: whether to cache the onnx session. By default this is true, meaning that repeated calls to this
            method is faster than if this was set to False, the consequence is that the session will be cached in
            memory until the process is terminated.

    Returns:
        Float tensor with shape ``(...,4)`` of DNSMOS values per sample, i.e. [p808_mos, mos_sig, mos_bak, mos_ovr]

    Raises:
        ModuleNotFoundError:
            If ``librosa``, ``onnxruntime`` or ``requests`` packages are not installed

    Example:
        >>> from torch import randn
        >>> from torchmetrics.functional.audio.dnsmos import deep_noise_suppression_mean_opinion_score
        >>> preds = randn(8000)
        >>> deep_noise_suppression_mean_opinion_score(preds, 8000, False)
        tensor([2.2..., 2.0..., 1.1..., 1.2...], dtype=torch.float64)

    zDNSMOS metric requires that librosa, onnxruntime and requests are installed. Install as `pip install librosa onnxruntime-gpu requests`.Nr"   p zDNSMOS/sig_bak_ovr.onnxz/DNSMOS/model_v8.onnx)orig_sr	target_srrV   )axisrW   .float32i`)rP   r;   r?   rB   z7Failed to use GPU for DNSMOS, reverting to CPU. Error: input_1r   float64)rx   dtype)   )'r   r	   r
   ModuleNotFoundErrortorchr9   _cached_load_sessionr   rC   SAMPLING_RATEr   resampler;   numpyintINPUT_LENGTHr]   rd   concatenatefloorrb   r^   arrayastyperh   rH   rD   rI   OrtValueortvalue_from_numpyrJ   r+   r   runrp   append
from_numpymeanstack)rq   rr   rj   r9   r:   rs   _load_session_function	onnx_sessp808_onnx_sess
desired_fsrP   len_samplesnum_hopsmosshop_len_samplesidx	audio_segr]   input_featuresp808_input_featureseoip808_oimos_npr   r   r   r      s`   3 $""
r   r   )rM   rN   rO   r   T)NNT)%r$   	functoolsr   typingr   r   r   rd   r   r   torchmetrics.utilitiesr   r   torchmetrics.utilities.importsr   r	   r
   r   r   rD   r   r   __doctest_requires__r   r   rC   r   r8   r9   r   r   r   ndarrayboolrh   rp   r   r   r   r   r   <module>   s   
"

)
#