o
    …wÖiŽ  ã                   @   s‚   d dl Z d dlZd dlZd dlZd dlZd dlZd dlZd dl	m
Z
mZ d dlmZmZ d dlmZ d dlmZ G dd„ dƒZdS )é    N)ÚAudioToMultiLabelDatasetÚ'TarredAudioToClassificationLabelDataset)ÚFeatureToLabelDatasetÚFeatureToSeqSpeakerLabelDataset)ÚExternalFeatureLoader)ÚWaveformFeaturizerc                   @   sl   e Zd Zg d¢Zg d¢Zejjdd„ ƒZejjdd„ ƒZ	ejjdd„ ƒZ
ejjd	d
„ ƒZejjdd„ ƒZdS )ÚTestASRDatasets)ÚfashÚfbbhÚfclc)Ú0Ú1Ú2Ú3ÚzeroÚoneÚtwoÚthreec           	         óÌ   t j t j ˆ d¡¡}t j t j ˆ d¡¡}tddd d}t||| j|d}t|ƒdks.J ‚d}|D ]}|d	7 }q2|dks?J ‚‡ fd
d„tdƒD ƒ}t||| j|d}d}|D ]}|d	7 }qW|dksdJ ‚d S )Nz)asr/tarred_an4/tarred_audio_manifest.jsonúasr/tarred_an4/audio_{0..1}.taré€>  F©Úsample_rateÚ
int_valuesÚ	augmentor©Úaudio_tar_filepathsÚmanifest_filepathÚlabelsÚ
featurizeré    r   é   c              
      ó*   g | ]}t j t j ˆ d |› d¡¡‘qS ©zasr/tarred_an4/audio_z.tar©ÚosÚpathÚabspathÚjoin©Ú.0Úi©Útest_data_dir© úf/home/ubuntu/sommelier/.venv/lib/python3.10/site-packages/tests/collections/asr/test_label_datasets.pyÚ
<listcomp>3   ó   * z7TestASRDatasets.test_tarred_dataset.<locals>.<listcomp>é   ©	r%   r&   r'   r(   r   r   r   ÚlenÚrange©	Úselfr-   Úmanifest_pathÚtarpathr   Úds_braceexpandÚcountÚ_Úds_list_loadr.   r,   r/   Útest_tarred_dataset!   s&   
ÿ

ÿ
z#TestASRDatasets.test_tarred_datasetc           	         r   )Nz3asr/tarred_an4/tarred_duplicate_audio_manifest.jsonr   r   Fr   r   é   r   r!   c              
      r"   r#   r$   r)   r,   r.   r/   r0   P   r1   zFTestASRDatasets.test_tarred_dataset_duplicate_name.<locals>.<listcomp>r2   r3   r6   r.   r,   r/   Ú"test_tarred_dataset_duplicate_name<   s*   ÿ
ÿ

ÿ
z2TestASRDatasets.test_tarred_dataset_duplicate_namec           	      C   s´   t j t j |d¡¡}td d}t|| j|d}t g d¢¡}t d¡}|d d j	dks/J ‚t 
|d d |¡s;J ‚t 
|d d	 |¡sGJ ‚d}|D ]}|d
7 }qK|dksXJ ‚d S )Nzasr/feat/emb.json)r   )r   r   Úfeature_loader)2ç        ç      ð?ç       @rD   rC   rD   rC   rD   rD   rC   rD   rD   g      @rC   rD   rD   rD   rB   rD   rC   rC   rD   rD   rC   rC   rD   rC   rC   rC   rC   rB   rB   rB   rD   rB   rD   rD   rD   rC   rD   rC   rC   rB   rC   rC   rB   rD   rC   rD   rC   é2   r   )rE   r    r2   é   r!   )r%   r&   r'   r(   r   r   Úunique_labels_in_seqÚtorchÚtensorÚshapeÚequal)	r7   r-   r8   rA   r:   Úcorrect_labelÚcorrect_label_lengthr;   r<   r.   r.   r/   Útest_feat_seqlabel_datasetY   s    
ÿÿ

z*TestASRDatasets.test_feat_seqlabel_datasetc              	   C   sT  t  ¡ œ}tj |d¡}t|ddd4}tdƒD ]'}tj |d|› d¡}t t 	dd	¡|¡ |d
ddœ}| 
t |¡d ¡ qW d   ƒ n1 sJw   Y  t|| jd}t | j d¡¡}t d¡}	|d d jdksoJ ‚t |d d |¡s{J ‚t |d d |	¡s‡J ‚d}
|D ]}|
d7 }
q‹|
dks˜J ‚W d   ƒ d S 1 s£w   Y  d S )Númanifest_input.jsonÚwúutf-8©Úencodingr2   Úfeat_z.ptéP   é   i † r   )Úfeature_fileÚdurationÚlabelÚ
)r   r   r!   r   )rU   rV   rF   )ÚtempfileÚTemporaryDirectoryr%   r&   r(   Úopenr5   rH   ÚsaveÚrandnÚwriteÚjsonÚdumpsr   rG   rI   ÚindexrJ   rK   )r7   Útmpdirr8   Úfpr+   Ú	feat_fileÚentryÚdatasetrL   rM   r;   r<   r.   r.   r/   Útest_feat_label_datasetp   s*   
üÿ

"ëz'TestASRDatasets.test_feat_label_datasetc              	   C   sl  t  ¡ ¨}tj |d¡}t|ddd9}tdƒD ],}tj |d|› d¡}tj 	dd	d
¡}t
 ||d¡ |dddœ}| t |¡d ¡ qW d   ƒ n1 sOw   Y  t|dddgd}t g d¢¡}	t d¡}
|d d jt dgd
 ¡jks{J ‚t |d d |	¡s‡J ‚t |d d |
¡s“J ‚d}|D ]}|d	7 }q—|dks¤J ‚W d   ƒ d S 1 s¯w   Y  d S )NrO   rP   rQ   rR   r2   Úaudio_z.wavr   r!   i q r   é
   z0 1 0 1)Úaudio_filepathrX   rY   rZ   r   r   )r   r   r   )r   r!   r   r!   é   gš™™™™™¹?rF   )r[   r\   r%   r&   r(   r]   r5   ÚnpÚrandomÚnormalÚsfr`   ra   rb   r   rH   rI   rJ   rK   )r7   rd   r8   re   r+   Ú
audio_fileÚdatarg   rh   rL   rM   r;   r<   r.   r.   r/   Útest_audio_multilabel_datasetŠ   s,   
ûÿ
$
"êz-TestASRDatasets.test_audio_multilabel_datasetN)Ú__name__Ú
__module__Ú__qualname__r   rG   ÚpytestÚmarkÚunitr>   r@   rN   ri   rt   r.   r.   r.   r/   r      s    



r   )ra   r%   r[   Únumpyrn   rx   Ú	soundfilerq   rH   Ú(nemo.collections.asr.data.audio_to_labelr   r   Ú*nemo.collections.asr.data.feature_to_labelr   r   Ú7nemo.collections.asr.parts.preprocessing.feature_loaderr   Ú1nemo.collections.asr.parts.preprocessing.featuresr   r   r.   r.   r.   r/   Ú<module>   s   