o
    }oi                     @   sj   d dl Z d dlZd dlZd dlZd dlZd dlmZ d dlm	Z	 d dl
mZmZ dd ZG dd dZdS )	    N)AudioToSpeechE2ESpkDiarDataset)WaveformFeaturizer)get_vad_out_from_rttm_lineread_rttm_linesc                 C   s8   t | }d}|D ]}t|\}}t||| }q||kS )af  
    Check if the maximum RTTM duration exceeds the length of the provided audio file.

    Args:
        rttm_file_path (str): Path to the RTTM file.
        wav_len_in_sec (float): Length of the audio file in seconds.

    Returns:
        bool: True if the maximum RTTM duration is less than or equal to the length of the audio file, False otherwise.
    r   )r   r   max)rttm_file_pathwav_len_in_sec
rttm_linesmax_rttm_seclinestartdur r   f/home/ubuntu/.local/lib/python3.10/site-packages/tests/collections/speaker_tasks/test_diar_datasets.pyis_rttm_length_too_long   s   r   c                   @   s   e Zd Zejjdd ZdS )"TestAudioToSpeechE2ESpkDiarDatasetc                 C   sT  t jt j|d}d}d}tj rdnd}g }tjddd}t	|d	dd
5}t
|D ](\}	}
|	|kr8 n|
d|d dd}
||
 d t|
}|| q.W d    n1 saw   Y  |d tddd d}t|jddd|ddd|d	}tjjj|||jddddd}t||| ksJ t|}d}t
|D ]s\}}||d krt||ksJ d|\}}}}t|jd D ]#}||  }t||| |  d |j | }||| ksJ dqt| rJ dt| rJ dt| r
J dt| rJ dqW d    d S 1 s#w   Y  d S ) Nzasr/diarizer/lsm_val.json      cudacpuwzutf-8)modeencodingr)r   ztests/data//
 r   i>  F)sample_rate
int_values	augmentorg      ?Z   g{Gz?)	manifest_filepathsoft_label_thressession_len_secnum_spks
featurizerwindow_strideglobal_ranksoft_targetsdevice   )dataset
batch_size
collate_fn	drop_lastshufflenum_workers
pin_memoryz,Batch size does not match the expected valuedurationzDuration deviation exceeds 1%z(audio_signals tensor contains NaN valuesz+audio_signal_len tensor contains NaN valuesz"targets tensor contains NaN valuesz&target_lens tensor contains NaN values) ospathabspathjointorchr   is_availabletempfileNamedTemporaryFileopen	enumeratereplacewritejsonloadsappendseekr   r   nameutilsdata
DataLoadereesd_train_collate_fnlenrangeshapeitemabsr   isnanany)selftest_data_dirmanifest_pathr,   num_samplesr)   data_dict_listfmfileixr   	data_dictr%   r+   dataloader_instancebatch_countsdeviation_thres_ratebatch_indexbatchaudio_signalsaudio_signal_lentargetstarget_lenssample_indexdataloader_audio_in_secdata_dur_in_secr   r   r   test_e2e_speaker_diar_dataset0   sz   


	$z@TestAudioToSpeechE2ESpkDiarDataset.test_e2e_speaker_diar_datasetN)__name__
__module____qualname__pytestmarkunitrd   r   r   r   r   r   .   s    r   )r?   r3   r9   rh   
torch.cudar7   -nemo.collections.asr.data.audio_to_diar_labelr   1nemo.collections.asr.parts.preprocessing.featuresr   .nemo.collections.asr.parts.utils.speaker_utilsr   r   r   r   r   r   r   r   <module>   s   