o
    }oio                  	   @   sl  d dl Z d dlZd dlZd dlZd dlZd dlZd dlmZ d dlZ	d dl
Z
d dlZd dlZd dlmZmZ d dlmZ d dlmZ d dlmZmZmZmZ d dlmZmZmZmZ d dl m!Z! d d	l"m#Z#m$Z$ d d
l%m&Z& d dl'm(Z( d dl)m*Z* d dl+m,Z, d dl-m.Z. zeeZ/W n e0e1fy   dZ/Y nw dd Z2de*j3fddZ4G dd dZ5G dd dZ6dS )    N)mock)
DictConfig	OmegaConf)
DataLoader)audio_to_text_dataset)DataStoreObjectTarredAudioToBPEDatasetTarredAudioToCharDatasetcache_datastore_manifests)__DALI_MINIMUM_VERSION__AudioToBPEDALIDatasetAudioToCharDALIDatasetis_dali_supported))inject_dataloader_value_from_model_config)FeatureToBPEDatasetFeatureToCharDataset)EncDecCTCModel)write_manifest)
tokenizers)!get_lhotse_dataloader_from_config)loggingFc                 C   sF   g }|    } | D ]}|| }|| q
|d | }d|}|S )N )cpunumpyappendjoin)tokenstoken_lengthmappingtextidx
text_token r"   [/home/ubuntu/.local/lib/python3.10/site-packages/tests/collections/asr/test_asr_datasets.pydecode_chars8   s   
r$   	tokenizerc                 C   s&   |    } | d | } || }|S )N)r   r   ids_to_text)r   r   r%   r   r"   r"   r#   decode_subwordsD   s   
r'   c                   @   sH  e Zd Zg dZejjdd Zejjdd Zejjdd Z	ej
 ejjdd	 Zejje d
dejjdd Zejje d
dejjdd Zejje d
dejjdd Zejje d
dejjdd Zejje d
dejjdd Zejjdd Zejjdd Zejjdd Zejjdd ZdS )TestASRDatasets) abcdefghijklmnopqrstuvwxyz'c                    s   t jt j d}t jt j d}t||| jdd}t|dks'J d}|D ]}|d7 }q+|dks8J  fdd	td
D }t||| jdd}d}|D ]}|d7 }qP|dks]J d S )N)asr/tarred_an4/tarred_audio_manifest.jsonasr/tarred_an4/audio_{0..1}.tar>  audio_tar_filepathsmanifest_filepathlabelssample_rate    r      c              
      *   g | ]}t jt j d | dqS zasr/tarred_an4/audio_z.tarospathabspathr   .0r2   test_data_dirr"   r#   
<listcomp>{      * z7TestASRDatasets.test_tarred_dataset.<locals>.<listcomp>   )rR   rS   rT   r   r	   rK   lenrange)selfrX   manifest_pathtarpathds_braceexpandcount_ds_list_loadr"   rW   r#   test_tarred_datasetk   s$   



z#TestASRDatasets.test_tarred_datasetc                 C   st   t jt j|d}t jt j|d}t||| jdd}t|dks'J d}|D ]}|d7 }q+|dks8J d	S )
z
        Checks for
            1. file count when manifest len is less than tarred dataset
            2. Ignoring files in manifest that are not in tarred balls

        z3asr/tarred_an4/tarred_duplicate_audio_manifest.jsonrF   rG   rH      r   rN      N)rR   rS   rT   r   r	   rK   r\   )r^   rX   r_   r`   ra   rb   rc   r"   r"   r#   test_tarred_dataset_filter   s   

z*TestASRDatasets.test_tarred_dataset_filterc                 C   sn   dt j_|t j tttg dd}ttt	| j
d}t||dd d|jv s1J dt j_d S )NT)r*   r+   r,   rK   rK   )keyzz`labels` is explicitly provided to the data loader, and is different from the `labels` provided at the model level config.F)r   _logger	propagate	set_levelWARNINGr   createdictcopydeepcopyrK   r   r   )r^   caplog	model_cfgdataloader_cfgr"   r"   r#   (test_mismatch_in_model_dataloader_config   s   z8TestASRDatasets.test_mismatch_in_model_dataloader_configc           
         s   t jt j d}t j dddd}tjd|d}t jt j d}t|||d	d
}t|dks7J d}|D ]}|d7 }q;|dksHJ  fddtdD }t|||d	d
}	d}|	D ]}|d7 }q_|dkslJ d S )NrE   asrr   an4_wpe_128	vocab.txtbert-base-casedpretrained_model_name
vocab_filerF   rG   )rI   rJ   r%   rL   rM   r   rN   c              
      rO   rP   rQ   rU   rW   r"   r#   rY      rZ   z;TestASRDatasets.test_tarred_bpe_dataset.<locals>.<listcomp>r[   )	rR   rS   rT   r   r   AutoTokenizerr   r\   r]   )
r^   rX   r_   tokenizer_pathr%   r`   ra   rb   rc   rd   r"   rW   r#   test_tarred_bpe_dataset   s(   

z'TestASRDatasets.test_tarred_bpe_datasetz4NVIDIA DALI is not installed or incompatible version)reasonc              
      s  t jt j|d}d}d}tj rdnd}g }tjddd%}t	|d	dd
5}t
|D ](\}	}
|	|kr9 n|
dddd}
||
 d t|
}||d  q/W d    n1 sbw   Y  |d t|j|| jdddd}t||| ksJ d}g }|D ]!}|d }|d } fddt||D }|| |t|7 }q||ksJ t||D ]
\}}||ksJ q|d t|j|| jdddd}t||| ksJ d}g }|D ]!}|d }|d } fddt||D }|| |t|7 }q||ksJ d}t||D ]\}}||kr |d7 }q|dks)J tt|t|D ]\}}||ks=J q2W d    d S 1 sKw   Y  d S )Nasr/an4_val.json
   r[   gpur   r@   utf-8modeencodingr;   r   tests/data/tests/.data/
r   r   r         0@enF)rJ   device
batch_sizerK   max_durationparsershuffle   c                        g | ]\}}t || jd qS )r   r$   rK   rV   
transcripttranscripts_lengthr^   r"   r#   rY          z:TestASRDatasets.test_dali_char_dataset.<locals>.<listcomp>Tc                    r   r   r   r   r   r"   r#   rY     r   rN   )rR   rS   rT   r   torchcudais_availabletempfileNamedTemporaryFileopen	enumeratereplacewritejsonloadsr   seekr   namerK   r\   zipextendsorted)r^   rX   r_   num_samplesr   r   textsr/   r6   ixlinedatadatasetrb   original_transcriptsbatchtranscriptstranscripts_lengthsr   og_transcriptshuffled_transcriptssamples_changedorigshuffledshuffled_transcriptr"   r   r#   test_dali_char_dataset   s   









$z&TestASRDatasets.test_dali_char_datasetc              	      s  t jt j|d}d}d}tj rdnd}g }t j|dddd	}tjd
|d t	j
ddd!}t|ddd5}	t|	D ](\}
}|
|krJ n|dddd}|| d t|}||d  q@W d    n1 ssw   Y  |d t|j ||ddd}t||| ksJ d}g }|D ]!}|d }|d } fddt||D }|| |t|7 }q||ksJ t||D ]
\}}||ksJ q|d t|j ||ddd}t||| ksJ d}g }|D ]!}|d }|d } fddt||D }|| |t|7 }q||ksJ d}t||D ]\}}||kr-|d7 }q |dks6J tt|t|D ]\}}||ksJJ q?W d    d S 1 sXw   Y  d S ) Nr   r   r[   r   r   rw   r   rx   ry   rz   r{   r@   r   r   r;   r   r   r   r   r   r   r   r   F)rJ   r%   r   r   r   r   r   c                       g | ]\}}t || d qS r%   r'   r   r   r"   r#   rY   K      z9TestASRDatasets.test_dali_bpe_dataset.<locals>.<listcomp>Tc                    r   r   r   r   r   r"   r#   rY   i  r   rN   )rR   rS   rT   r   r   r   r   r   r~   r   r   r   r   r   r   r   r   r   r   r   r   r\   r   r   r   )r^   rX   r_   r   r   r   r   r   r/   r6   r   r   r   r   rb   r   r   r   r   r   r   r   r   r   r   r   r"   r   r#   test_dali_bpe_dataset!  s   

	


	


$z%TestASRDatasets.test_dali_bpe_datasetc              
   C   s  t jt j|d}d}d}g }tjddd}t|d5}t|D ](\}}	||kr- n|	dd	d
d}	|	|	 d
 t
|	}
||
d  q#W d    n1 sVw   Y  |d ddd}t|}|jd| j|ddddd}tj|dddd|d}tj|d}t|||jddddd}t|}t||D ]S\}}|\}}}}|||d\}}|\}}}}|  d d d d d |f }|  d d d d d |f }t|| }t|dk sJ t|dk sJ qW d    d S 1 sw   Y  d S )Nr   r   rN   r@   r   r   r;   r   r   r   r   r   r   >nemo.collections.asr.modules.AudioToMelSpectrogramPreprocessor        _target_ditherrG   F333330@)rJ   rL   rK   r   trim_silencer   r   	is_tarredconfigr   	device_idglobal_rank
world_sizepreprocessor_cfg)r   r   r   
collate_fn	drop_lastr   num_workers
pin_memoryinput_signallength-C6?{Gz?)rR   rS   rT   r   r   r   r   r   r   r   r   r   r   r   r   r   rK   r   get_dali_char_datasetget_char_datasetr   r   r   from_config_dictr   r   r   npabsmeanmax)r^   rX   r_   r   r   r   r/   r6   r   r   r   preprocessorr   dataset_cfgdali_datasetref_datasetref_dataloaderref_preprocessorref_data	dali_data	ref_audioref_audio_lenrc   ref_featuresref_features_lendali_featuresdali_features_lenr*   r+   errr"   r"   r#   test_dali_char_vs_ref_datasetz  sz   



	"""z-TestASRDatasets.test_dali_char_vs_ref_datasetc                    sh  t jt jd}fddtdD }fddtdD }d}tj r*dnd}g }tj	d	d
d}d}	t
|d}
t|
 }	W d    n1 sNw   Y  t||||| jdddd	}t||	| kskJ d}g }|D ]!}|d }|d } fddt||D }|| |t|7 }qq||	ksJ t||D ]
\}}||ksJ qt||||| jdddd	}t||	| ksJ d}g }|D ]!}|d }|d } fddt||D }|| |t|7 }q||	ksJ d}t||D ]\}}||kr|d7 }q|dksJ tt|t|D ]\}}||ksJ qW d    d S 1 s-w   Y  d S )NrE   c              
      rO   rP   rQ   rV   r    rW   r"   r#   rY         zATestASRDatasets.test_tarred_dali_char_dataset.<locals>.<listcomp>r[   c              
      rO   z asr/tarred_an4/dali_index/audio_z.indexrQ   r   rW   r"   r#   rY            r   r   r@   r   r   r   r;   r   r   F)	rJ   rI   audio_tar_index_filepathsr   r   rK   r   r   r   r   c                    r   r   r   r   r   r"   r#   rY     r   Tc                    r   r   r   r   r   r"   r#   rY   	  r   rN   )rR   rS   rT   r   r]   r   r   r   r   r   r   r\   	readlinesr   rK   r   r   r   )r^   rX   r_   rI   r   r   r   r   r/   r   r6   r   rb   r   r   r   r   r   r   r   r   r   r   r   r"   )r^   rX   r#   test_tarred_dali_char_dataset  s   






$z-TestASRDatasets.test_tarred_dali_char_datasetc                     s  t jt j d} fddtdD } fddtdD }d}g }tjddd	}d
}t|d}	t|	D ]\}
}t	
|}||d  |
}q9W d    n1 sVw   Y  ddd}t|}|j||d| j|ddddd
}tj|dd
d
d|d}tj|d
d
dd}t|||jddd
dd}t|}t||D ]S\}}|\}}}}|||d\}}|\}}}}|  d d d d d |f }|  d d d d d |f }t|| }t|dk sJ t|dk sJ qW d    d S 1 sw   Y  d S )NrE   c              
      rO   rP   rQ   r   rW   r"   r#   rY     r   zHTestASRDatasets.test_dali_tarred_char_vs_ref_dataset.<locals>.<listcomp>r[   c              
      rO   r   rQ   r   rW   r"   r#   rY   !  r   r   r@   r   r   r   r;   r   r   r   r   rG   Fr   )
rJ   tarred_audio_filepathstarred_audio_index_filepathsrL   rK   r   r   r   r   r   rN   r   )r   	shuffle_nr   r   r   r   r   r   )rR   rS   rT   r   r]   r   r   r   r   r   r   r   r   r   rK   r   r   get_tarred_datasetr   r   r   r   r   r   r   r   r   r   r   ) r^   rX   r_   rI   r   r   r   r/   r   r6   r   r   r   r   r   r   r   r   r   r   r   r   r   r   rc   r   r   r   r   r*   r+   r   r"   rW   r#   $test_dali_tarred_char_vs_ref_dataset  s   



	"""z4TestASRDatasets.test_dali_tarred_char_vs_ref_datasetc              	   C   s0  d}d}t  }tj|d}t|ddd5}t|D ](}tj|d| d}tt	d	d| d
|ddd}|
t|d  qW d    n1 sOw   Y  t|| jd}	d}
|	D ] }|
d7 }
|d }|d }|j|kstJ t|tdsJ q_|
|ksJ W d    d S 1 sw   Y  d S )Nrg   P   rg   manifest_input.jsonr@   r   r   feat_.ptr  r   順 a b caudio_filepathfeature_filedurationr   r   ri   r   rN   r   )r   TemporaryDirectoryrR   rS   r   r   r]   r   saverandnr   r   dumpsr   rK   shapeequaltensor)r^   r   golden_feat_shapetmpdirr_   fpr2   	feat_fileentryr   cntitemfeat	token_lenr"   r"   r#   !test_feature_to_text_char_datasetf  s,   
"z1TestASRDatasets.test_feature_to_text_char_datasetc              	   C   sP  d}d}t j|dddd}tjd|d}t }t j|d	}t|d
dd5}t|D ](}	t j|d|	 d}
t	
t	dd|
 d|
ddd}|t|d  q-W d    n1 s`w   Y  t||d}d}|D ] }|d7 }|d }|d }|j|ksJ t	|t	dsJ qo||ksJ W d    d S 1 sw   Y  d S )Nrg   r   rw   r   rx   ry   rz   r{   r  r@   r   r   r  r  r  r   r  r  r  r   r   r   rN   r   )rR   rS   r   r   r~   r   r  r   r]   r   r  r  r   r   r  r   r  r  r  )r^   rX   r   r  r   r%   r  r_   r  r2   r  r  r   r  r  r  r  r"   r"   r#    test_feature_to_text_bpe_dataset}  s0   
"z0TestASRDatasets.test_feature_to_text_bpe_datasetc              
   C   s  d}d}t dd}|tj }t }tj|d}t	|ddd}tj|d	}t 
|| tj|d
}	t	|	d}
|
d W d    n1 sLw   Y  d||	ddd}|t|d  tj|d}t 
|| tj|d}	t	|	d}
|
d W d    n1 sw   Y  d||	ddd}|t|d  W d    n1 sw   Y  t|| jd dd}d}|D ]5}|d7 }|d }|d }|j|ksJ t |t dsJ |dkrt ||sJ qt ||sJ q||ksJ W d    d S 1 sw   Y  d S )Nr[   r  r   r  r   r  r@   r   r   	feat_0.ptrttm_0.rttm.SPEAKER <NA> 1 0 1 <NA> <NA> speech <NA> <NA>
r   r  r  r  r	  	rttm_filer
  r   r   	feat_1.ptrttm_1.rttm.SPEAKER <NA> 1 0 0 <NA> <NA> speech <NA> <NA>
T)rK   	normalizeuse_rttmr   rN   r   rg   )r   onesr   ZERO_LEVEL_SPEC_DB_VALr   r  rR   rS   r   r   r  r   r   r  rK   r  r  r  )r^   r   r  samplemasked_sampler  r_   r  r  r"  foutr  r   r  r  r  r  r"   r"   r#   +test_feature_with_rttm_to_text_char_dataset  s^   

"$z;TestASRDatasets.test_feature_with_rttm_to_text_char_datasetc              
   C   s@  t j|dddd}tjd|d}d}d}td	d
}|tj }t	
 }t j|d}	t|	ddd}
t j|d}t|| t j|d}t|d}|d W d    n1 s]w   Y  d||ddd}|
t|d  t j|d}t|| t j|d}t|d}|d W d    n1 sw   Y  d||ddd}|
t|d  W d    n1 sw   Y  t|	|d dd}d}|D ]6}|d7 }|d }|d }|j|ksJ t|tdsJ |dkrt||sJ qt||sJ q||ksJ W d    d S 1 sw   Y  d S ) Nrw   r   rx   ry   rz   r{   r[   r  r  r   r  r@   r   r   r  r  r   r   r  r  r!  r   r#  r$  r%  T)r%   r&  r'  r   rN   r   rg   )rR   rS   r   r   r~   r   r(  r   r)  r   r  r   r  r   r   r  r   r  r  r  )r^   rX   r   r%   r   r  r*  r+  r  r_   r  r  r"  r,  r  r   r  r  r  r  r"   r"   r#   *test_feature_with_rttm_to_text_bpe_dataset  sb   

"$z:TestASRDatasets.test_feature_with_rttm_to_text_bpe_datasetN)__name__
__module____qualname__rK   pytestmarkunitre   rh   rv   with_downloadsr   skipif	HAVE_DALIr   r   r   r   r   r  r  r-  r.  r"   r"   r"   r#   r(   K   sD    


VWHTJ


9r(   c                   @   s4   e Zd ZejjejdddgdefddZdS )TestUtilityFunctionscache_audioFTc              
      s8  d}d}d}d}d}t jj|d}t|| }t }	tj|	dt	 g }
g }t
|D ]f}tjd| }t	| tj|d	}g }|jd
d||fd}t
|D ]3}d| d|dd}tj||}t||dd|f |d |||d|dd || qXt|| |
| q0tj|	d  fdd}tddd ' tjtd| t|
|dd W d   n1 sw   Y  W d   n1 sw   Y  |
}|r||7 }|D ]"}tj tj|}tj||ddsJ d| d | d!qW d   dS 1 sw   Y  dS )"z)Test caching of manifest and audio files.*   rG   r   r[   g      ?)seedstore	manifest_zmanifest.jsong      g      ?)lowhighsize_audio_02dz.wavNfloatztext for example )r  r
  r   cachec                    sP   t jj| jd}t j || _t jt j| jdd t	
| j| j | jS )N)startT)exist_ok)rR   rS   relpath
store_pathr   _local_pathmakedirsdirname
local_pathshutilrq   )r^   object_pathtest_cache_dirtest_store_dirr"   r#   fake_getE  s
   zETestUtilityFunctions.test_cache_datastore_manifests.<locals>.fake_getz9nemo.collections.asr.data.audio_to_text.is_datastore_pathc                 S   s   dS )NTr"   )rA   r"   r"   r#   <lambda>P  s    zETestUtilityFunctions.test_cache_datastore_manifests.<locals>.<lambda>getrN   )r9  r   F)shallowzFiles z and z do not match.)r   randomdefault_rngintr   r  rR   rS   r   mkdirr]   uniformsfr   r   r   r   patchobjectr   r
   rG  filecmpcmp)r^   r9  random_seedrL   num_examplesnum_manifestsdata_duration_rngdata_duration_samplestest_dirmanifest_filepathsaudio_filesr6   manifest_dirrJ   metadatar   r7   r  
audio_filerR  store_files_to_comparef_storef_cacher"   rO  r#   test_cache_datastore_manifests  sb   




 *$z3TestUtilityFunctions.test_cache_datastore_manifestsN)	r/  r0  r1  r2  r3  r4  parametrizeboolro  r"   r"   r"   r#   r8    s    r8  )7rq   r^  r   rR   rM  r   unittestr   r   r   r2  	soundfiler[  
torch.cudar   	omegaconfr   r   torch.utils.datar   nemo.collections.asr.datar   'nemo.collections.asr.data.audio_to_textr   r   r	   r
   ,nemo.collections.asr.data.audio_to_text_dalir   r   r   r   /nemo.collections.asr.data.audio_to_text_datasetr   )nemo.collections.asr.data.feature_to_textr   r   &nemo.collections.asr.models.ctc_modelsr   /nemo.collections.asr.parts.utils.manifest_utilsr   nemo.collections.commonr   #nemo.collections.common.data.lhotser   
nemo.utilsr   r7  ImportErrorModuleNotFoundErrorr$   TokenizerSpecr'   r(   r8  r"   r"   r"   r#   <module>   sJ        G