o
    ui5                     @   s\   d dl Z d dlZd dlZd dlZd dlZd dlZd dl	Z	e	
d dZdZG dd dZdS )    Nignore>  gQ"@c                   @   s8   e Zd ZdZddddZ	dddZdd Zdd ZdS )ComputeScorez3
    ComputeScore class for evaluating DNSMOS.
    cpureturnNc                 C   s>   |dkrt j|dgd| _td| j  dS t || _dS )a<  
        Initialize the ComputeScore object.

        Args:
            primary_model_path (str): Path to the primary model.
            device (str): Device to run the models on ('cpu' or 'cuda').

        Returns:
            None

        Raises:
            RuntimeError: If the device is not supported.
        cudaCUDAExecutionProvider)	providerszUsing CUDA:N)ortInferenceSession	onnx_sessprintget_providers)selfprimary_model_pathdevice r   8/home/ubuntu/sommelier/podcast-pipeline/models/dnsmos.py__init__    s   zComputeScore.__init__x   @     r   Tc                 C   s<   t jj|||d ||d}|rt j|tjdd d }|jS )a  
        Compute the mel spectrogram of an audio signal.

        Args:
            audio (np.ndarray): Input audio signal.
            n_mels (int): Number of mel bands.
            frame_size (int): Size of the FFT window.
            hop_length (int): Number of samples between successive frames.
            sr (int): Sampling rate.
            to_db (bool): Whether to convert the power spectrogram to decibel units.

        Returns:
            np.ndarray: Mel spectrogram.
           )ysrn_fft
hop_lengthn_mels)ref(   )librosafeaturemelspectrogrampower_to_dbnpmaxT)r   audior   
frame_sizer   r   to_dbmel_specr   r   r   audio_melspec6   s   zComputeScore.audio_melspecc                 C   s|   |rt g d}t g d}t g d}nt g d}t g d}t g d}||}||}	||}
||	|
fS )a  
        Apply polynomial fitting to MOS scores.

        Args:
            sig (float): Signal MOS score.
            bak (float): Background MOS score.
            ovr (float): Overall MOS score.
            is_personalized_MOS (bool): Flag for personalized MOS.

        Returns:
            tuple: Tuple containing the adjusted signal, background, and overall MOS scores.
        )g~Z!ugBt?g?gBü)g/g?,?g/z!?gX-*Ͽ)gPzg 7!BV?gojſgDW ?)g;YRg~?gz?)g
w=gc9?g%Pzu?)ggFu?gCyXٿ)r$   poly1d)r   sigbakovris_personalized_MOSp_ovrp_sigp_baksig_polybak_polyovr_polyr   r   r   get_polyfit_valN   s   
zComputeScore.get_polyfit_valc                 C   s  t }t|trtj||d\}}n||krtj|||d}t|}tt| }t||k r9t	
||}t||k s-tt	t|| t d }|}	g }
g }g }g }g }g }t|D ]c}|t||	 t|t |	  }t||k rsqZt	|dt	jddf }d|i}| jd|d d \}}}| ||||\}}}|

| |
| |
| |
| |
| |
| qZd|| ||t	|t	|
t	|t	|t	|t	|d	
}|S )
a  
        Compute DNSMOS scores for an audio signal.

        Args:
            audio (np.ndarray or str): Input audio signal or path to audio file.
            sampling_rate (int): Sampling rate of the input audio.
            is_personalized_MOS (bool): Flag for personalized MOS.

        Returns:
            dict: Dictionary containing MOS scores.

        Raises:
            ValueError: If the input audio is not valid.
        )r   )orig_sr	target_srr   float32Ninput_1r   
audio_clip)
filename
len_in_secr   num_hopsOVRL_rawSIG_rawBAK_rawOVRLSIGBAK)SAMPLING_RATE
isinstancestrr    loadresamplelenintINPUT_LENGTHr$   appendfloorrangearrayastypenewaxisr   runr7   mean)r   r'   sampling_rater0   fs_actual_audio_lenlen_samplesr?   hop_len_samplespredicted_mos_sig_seg_rawpredicted_mos_bak_seg_rawpredicted_mos_ovr_seg_rawpredicted_mos_sig_segpredicted_mos_bak_segpredicted_mos_ovr_segidx	audio_seginput_featuresoimos_sig_rawmos_bak_rawmos_ovr_rawmos_sigmos_bakmos_ovr	clip_dictr   r   r   __call__j   s`   






zComputeScore.__call__)r   )r   N)r   r   r   r   T)__name__
__module____qualname____doc__r   r+   r7   rm   r   r   r   r   r      s    
r   )osr    numpyr$   onnxruntimer
   pandaspdtqdmwarningsfilterwarningsrF   rM   r   r   r   r   r   <module>   s   
