o
    Si                     @   s  d dl Z d dlZd dlmZ d dlmZmZmZmZ d dl	Z
d dlmZ d dlmZmZmZmZ d dlmZmZmZ G dd dZ			dd
edee defddZ			ddeeef d
edee deeddf fddZ			dded
edee fddZ			dded
edee fddZdS )    N)ThreadPoolExecutor)	GeneratorListOptionalUnion)tqdm)CutSetMonoCutRecordingSetSupervisionSegment)fastcopyis_module_availableresumable_downloadc                   @   s2   e Zd ZdddZ		dd
dZdd Zdd ZdS )ComputeScorereturnNc                 C   s$   dd l }||| _d| _d| _d S )Nr   >  gQ"@)onnxruntimeInferenceSession	onnx_sessSAMPLING_RATEINPUT_LENGTH)selfprimary_model_pathort r   K/home/ubuntu/.local/lib/python3.10/site-packages/lhotse/workflows/dnsmos.py__init__   s   
zComputeScore.__init__x   @     r   Tc           	      C   sD   dd l }|jj|||d ||d}|r|j|tjdd d }|jS )Nr      )ysrn_fft
hop_lengthn_mels)ref(   )librosafeaturemelspectrogrampower_to_dbnpmaxT)	r   audior%   
frame_sizer$   r"   to_dbr(   mel_specr   r   r   audio_melspec   s   zComputeScore.audio_melspecc                 C   s|   |rt g d}t g d}t g d}nt g d}t g d}t g d}||}||}	||}
||	|
fS )N)g~Z!ugBt?g?gBü)g/g?,?g/z!?gX-*Ͽ)gPzg 7!BV?gojſgDW ?)g;YRg~?gz?)g
w=gc9?g%Pzu?)ggFu?gCyXٿ)r,   poly1d)r   sigbakovris_personalized_mosp_ovrp_sigp_baksig_polybak_polyovr_polyr   r   r   get_polyfit_val!   s   
zComputeScore.get_polyfit_valc                 C   sL  | j }|| }t| j| }t||k r#t||}t||k sttt|| | j d }|}g }g }	g }
t	|D ]U}|t|| t|| j |  }t||k rYq?t
|dtjd d f }d|i}| jd |d d \}}}| ||||\}}}|| |	| |
| q?|t|
t|t|	dfS )Nr    float32input_1r   )OVRLSIGBAK)r   resample
load_audiointr   lenr,   appendfloorrangearrayastypenewaxisr   runr?   mean)r   manifestr8   fsr/   len_samplesnum_hopshop_len_samplespredicted_mos_sig_segpredicted_mos_bak_segpredicted_mos_ovr_segidx	audio_seginput_featuresoimos_sig_rawmos_bak_rawmos_ovr_rawmos_sigmos_bakmos_ovrr   r   r   __call__1   sB    


zComputeScore.__call__)r   N)r   r   r   r   T)__name__
__module____qualname__r   r3   r?   rc   r   r   r   r   r      s    

r   Fr8   download_rootr   c                 C   s:   |d ur|nd}| rdnd}t j|d}t||d |S )Nz/tmpz`https://github.com/microsoft/DNS-Challenge/raw/refs/heads/master/DNSMOS/pDNSMOS/sig_bak_ovr.onnxz_https://github.com/microsoft/DNS-Challenge/raw/refs/heads/master/DNSMOS/DNSMOS/sig_bak_ovr.onnxzsig_bak_ovr.onnx)filename)ospathjoinr   )r8   rg   urlrh   r   r   r   download_modelX   s   rm   rQ   c                 c   sj    t ds	J dt dsJ dt| tr!t| ||E dH  dS t| tr1t| ||E dH  dS td)a  
    Use Microsoft DNSMOS P.835 prediction model to annotate either RECORDINGS_MANIFEST, RECORDINGS_DIR, or CUTS_MANIFEST.
    It will predict DNSMOS P.835 score including SIG, NAK, and OVRL.

    See the original repo for more details: https://github.com/microsoft/DNS-Challenge/tree/master/DNSMOS

    :param manifest: a ``RecordingSet`` or ``CutSet`` object.
    :param is_personalized_mos: flag to indicate if personalized MOS score is needed or regular.
    :param download_root: if specified, the model will be downloaded to this directory. Otherwise,
        it will be downloaded to /tmp.
    :return: a generator of cuts (use ``CutSet.open_writer()`` to write them).
    r(   z[This function expects librosa to be installed. You can install it via 'pip install librosa'r   zcThis function expects onnxruntime to be installed. You can install it via 'pip install onnxruntime'Nz;The ``manifest`` must be either a RecordingSet or a CutSet.)r   
isinstancer
   _annotate_recordingsr   _annotate_cuts
ValueError)rQ   r8   rg   r   r   r   annotate_dnsmosg   s(   



rr   
recordingsc                 c   s    t ||}t|}t ]}g }t| ddD ] }|jdkr,td|j d|j d q||	||| qt|ddD ]$}|
 \}}	t|j|jd|jd	}
t|jd|jd||
g|	d
}|V  q=W d   dS 1 smw   Y  dS )z[
    Helper function that annotates a RecordingSet with DNSMOS P.835 prediction model.
    Distributing tasksdescr    zSkipping recording '
'. It has 4 channels, but we currently only support mono input.
Processingr   )idrecording_idstartduration)rz   r|   r}   channel	recordingsupervisionscustomN)rm   r   r   r   num_channelsloggingwarningrz   rI   submitresultr   r}   r	   )rs   r8   rg   r   compute_scoreexfuturesr   futurer   supervisioncutr   r   r   ro      s@   

	"ro   cutsc           
   	   c   s    t ||}t|}t S}g }t| ddD ] }|jdkr,td|j d|j d q||	||| qt|ddD ]}|
 \}}	|jdurQ|j|	 n|	|_|V  q=W d   dS 1 scw   Y  dS )	zU
    Helper function that annotates a CutSet with DNSMOS P.835 prediction model.
    rt   ru   r    zSkipping cut 'rw   rx   ry   N)rm   r   r   r   r   r   r   rz   rI   r   r   r   update)
r   r8   rg   r   r   r   r   r   r   r   r   r   r   rp      s(   


"rp   )FN)r   ri   concurrent.futures.threadr   typingr   r   r   r   numpyr,   r   lhotser   r	   r
   r   lhotse.utilsr   r   r   r   boolstrrm   rr   ro   rp   r   r   r   r   <module>   s^    L


-
,