o
    }oiU                     @   sf   d dl Z d dlZd dlZd dlZd dlZd dlZd dlm	Z	 d dl
mZ d dlmZ G dd dZdS )    N)audio_to_text)SemiSortBatchSampler)write_manifestc                   @   s$   e Zd Zg dZejjdd ZdS )TestASRSamplers) abcdefghijklmnopqrstuvwxyz'c                    sb  ddd}dt jj|d dtdtf fdd	}t }g }t t d
dg 	ddd 	dddgD ]}||}t
|D ]3\}}t }	|dd}
ttj||
| tj||
|	d< t| |	d< d|	d< ||	 qFtj|d}t|| tj|| jd}dd |jjjD } 	d
|d dD ]x}t|} 	ddrdnd}tdd
||d|d|d}tjjj|d |d dd  d!}tjjj||d"d  |dd#}tt|t| dksJ d$t| d%t| d&d'\}}zt| W n   d}Y zt| W n   d}Y ||ksJ qq<W d    d S 1 s*w   Y  d S )(Ng?g333330@*   i>  )seednum_examplesreturnc                    sT   t  j| dd}t | t}g }|D ]}| jdd|d q|S )N)lowhighsize   g      g      ?)nprounduniformfloorastypeintappend)r$   data_durationdata_duration_samplessamplesdata_duration_sample_rngdata_max_durationdata_min_durationsample_rate [/home/ubuntu/.local/lib/python3.10/site-packages/tests/collections/asr/test_asr_samplers.pygenerate_samplesF   s   z:TestASRSamplers.test_ssb_sampler.<locals>.generate_samples      r)   
   i  04dz.wavaudio_filepathdurationz	non emptytextzmanifest.json)manifest_filepathlabelsr9   max_durationmin_durationc                 S   s   g | ]}|j qS r:   )rB   ).0sampler:   r:   r;   
<listcomp>m   s    z4TestASRSamplers.test_ssb_sampler.<locals>.<listcomp>      r   TF)global_rank
world_size	durations
batch_sizebatch_shuffle	drop_lastrandomization_factorr#   c                 S      t j| ddS Nr   )pad_idr   _speech_collate_fnr   r:   r:   r;   <lambda>       z2TestASRSamplers.test_ssb_sampler.<locals>.<lambda>)datasetrP   samplerbatch_sampler
collate_fnc                 S   rT   rU   rW   rY   r:   r:   r;   rZ      r[   )r\   rP   r_   rR   shufflez@Different num of batches with batch! Num of batches with ssb is z and without ssb is !)FF)r*   randomdefault_rngr/   listtempfileTemporaryDirectoryconcatenatearrayintegers	enumeratedictsfwriteospathjoinlenr0   r   r   AudioToCharDatasetrE   manifest_processor
collectiondatar   torchutils
DataLoaderabs)selfrandom_seedr<   test_dirmetadatanum_samplesr3   r   rI   metasignal_filenamerD   r\   rO   rP   rR   r]   dataloader_with_ssb
dataloaderdataloader_with_ssb_exceptiondataloader_exceptionr:   r5   r;   test_ssb_sampler;   s   2

$z TestASRSamplers.test_ssb_samplerN)__name__
__module____qualname__rE   pytestmarkunitr   r:   r:   r:   r;   r      s    r   )rn   re   numpyr*   r   	soundfilerl   rv   nemo.collections.asr.datar   -nemo.collections.asr.parts.utils.asr_batchingr   /nemo.collections.asr.parts.utils.manifest_utilsr   r   r:   r:   r:   r;   <module>   s   