o
    }oi2                     @   s   d dl Z d dlZd dlZd dlmZmZ d dlmZ d dlm	Z	m
Z
mZ d dlm  mZ d dlmZ d dlmZ d dlmZmZ d dlmZmZ d d	lmZ d d
lmZ d dlmZm Z  e! dd Z"G dd dZ#dS )    N)CutSetMonoCut)DummyManifest)
DictConfig	OmegaConf	open_dict)audio_to_text)LhotseSpeechToTextBpeDataset)EncDecCTCModelconfigs)CTCDecodingCTCDecodingConfig)
Hypothesis)make_parser) assert_dataclass_signature_matchupdate_model_configc                  C   sr   ddi} dddddddgdgdgd	d
dddd
gd}dddg dd}t t | t |t |d}t|d}|S )N_target_z>nemo.collections.asr.modules.AudioToMelSpectrogramPreprocessorz+nemo.collections.asr.modules.ConvASREncoder@   reluT              F)
filtersrepeatkernelstridedilationdropoutresidual	separablesese_context_size)r   feat_in
activation	conv_maskjasperz+nemo.collections.asr.modules.ConvASRDecoder   ) abcdefghijklmnopqrstuvwxyz')r   r#   num_classes
vocabulary)preprocessorencoderdecodercfg)r   r
   )rF   rG   rH   modelConfigmodel_instance rM   b/home/ubuntu/.local/lib/python3.10/site-packages/tests/collections/asr/test_asr_ctcencdec_model.py	asr_model    s8   #
rO   c                   @   s   e Zd Zejjdd Zejjdd Zejjdd Zejjdd Z	ejjd	d
 Z
ejjdd Zejjdd Zejjdd Zejjdd Zejjdd ZdS )TestEncDecCTCModelc                 C   s,   |   | }t|}t|tsJ d S )N)trainto_config_dictr
   from_config_dict
isinstance)selfrO   confdict	instance2rM   rM   rN   test_constructord   s   
z#TestEncDecCTCModel.test_constructorc           
      C   s0  |  }d|jj_d|jj_tjdd}tjdddgd}t E g }t	|
dD ]#}|j|||d	  |||d	  d
\}}}|| tt| q+t|d}|j||d
\}}}W d    n1 siw   Y  |j|jksvJ tt|| }	|	dksJ tt|| }	|	dksJ d S )Nr   r   )   i   )size   i  rY   )lowhighrZ   r   )input_signalinput_signal_lengthgư>)evalrF   
featurizerditherpad_totorchrandnrandintno_gradrangerZ   forwardappendprintlencatshapemeanabsmax)
rU   rO   r^   lengthlogprobs_instancer1   logprobs_ins_logprobs_batchdiffrM   rM   rN   test_forwardm   s*   



zTestEncDecCTCModel.test_forwardc                 C   s   g d}|  }ttdddd}tt|ddd}|| }||d}t|dks+J t|d dks5J t|d d ts@J t|d d t	sKJ d S )	N)r(   r)   r*   r+   r   r   T)begin_idend_id	with_data)labels)	tokenizerreturn_cuts   )
r`   r   r   r	   r   predict_steprl   rT   r   r   )rU   rO   
token_listcutsdatasetbatchoutputsrM   rM   rN   test_predict_step   s   z$TestEncDecCTCModel.test_predict_stepc                 C   s   t |jj}|j}|j|d ||jksJ t |}|d |d |d |j|d |j|d|jjd   ks@J d S )N)new_vocabulary!$@   r   )copydeepcopyrH   rE   num_weightschange_vocabularyrj   _feat_in)rU   rO   	old_vocabnw1	new_vocabrM   rM   rN   test_vocab_change   s   



"z$TestEncDecCTCModel.test_vocab_changec                 C   s   |j d usJ t|j tsJ |j jjdksJ |j jdu s J |j jdu s(J tddd}|| |j jdu s;J |j jdu sCJ d S )Ngreedy_batchFT)preserve_alignmentscompute_timestamps)	decodingrT   r   rJ   strategyr   r   r   change_decoding_strategy)rU   rO   rJ   rM   rM   rN   test_decoding_change   s   
z'TestEncDecCTCModel.test_decoding_changec                 C   s   t |j}|jdd |j}|jjd jdksJ |jjd jdks%J |j D ]\}}t|j	j
dkr=|jdks=J q*d S )N    )context_windowr   r   SqueezeExciter   r   rJ   !change_conv_asr_se_context_windowrG   r&   r"   named_modulestype	__class____name__r   rU   rO   old_cfg
new_confignamer5   rM   rM   rN   &test_change_conv_asr_se_context_window   s   z9TestEncDecCTCModel.test_change_conv_asr_se_context_windowc                 C   s   t |j}|jddd |j}|jjd jdksJ |jjd jdks&J |j D ]\}}t|j	j
dkr>|jdks>J q+d S )Nr   F)r   update_configr   r   r   r   r   rM   rM   rN   7test_change_conv_asr_se_context_window_no_config_update   s   zJTestEncDecCTCModel.test_change_conv_asr_se_context_window_no_config_updatec                 C   s  t  }|jj}||j_d|jj_d|jj_t	j
jjdddgdgdgddddd		g|jj_d|jj_d
|jj_||jj_td|ji}t||}t|jd}|j|jksWJ d|jvs^J d|jvseJ d|jvslJ d|jvssJ td|ji}t||dd}d|jv sJ d|jv sJ d|jv sJ d|jv sJ t|j |jd |jd |jd W d    n1 sw   Y  t|jd}|j|jksJ d S )Nr   r   r   r   r   FTr   )	r   r   r   r   r   r   r   r!   r"   r'   modelrI   train_dsvalidation_dstest_dsoptim)drop_missing_subconfigs)r   EncDecCTCModelConfigrH   rE   r   r|   rG   r$   r#   nemo_asrmodulesconv_asrJasperEncoderConfigr&   rD   r   createrJ   r   r
   r   r   pop)rU   rO   	model_cfgrE   asr_cfgmodel_cfg_v1	new_modelmodel_cfg_v2rM   rM   rN   test_dataclass_instantiation   sR   






z/TestEncDecCTCModel.test_dataclass_instantiationc                 C   sR   g d}ddi}t tjtj||d}|\}}}|sJ |d u s!J |d u s'J d S )N)	is_tarrednum_workers
batch_sizetarred_audio_filepathsshuffle
pin_memory	drop_lasttarred_shard_strategyshard_manifests	shuffle_nuse_start_end_tokenr   bucketing_batch_sizebucketing_strategybucketing_weightschannel_selector
use_lhotsetarred_random_accessuse_bucketingbatch_durationquadratic_durationbucket_batch_sizebucket_duration_binsnum_bucketsr   trim_silencetrimignore_args
remap_args)r   r   AudioToCharDatasetr   ASRDatasetConfigrU   IGNORE_ARGS
REMAP_ARGSresultsignatures_match
cls_subsetdataclass_subsetrM   rM   rN   ,test_ASRDatasetConfig_for_AudioToCharDataset  s   
z?TestEncDecCTCModel.test_ASRDatasetConfig_for_AudioToCharDatasetc                 C   sX   g d}ddddd}t tjtj||d}|\}}}|sJ |d u s$J |d u s*J d S )N)r   r   r   r   r   r   global_rank
world_sizer   r   r   r   max_uttsr   r   r   r   r   r   r   r   r   r   audio_tar_filepathsshard_strategyr   )r   r   r   r   r   )r   r   TarredAudioToCharDatasetr   r   r   rM   rM   rN   2test_ASRDatasetConfig_for_TarredAudioToCharDataset@  s    
zETestEncDecCTCModel.test_ASRDatasetConfig_for_TarredAudioToCharDatasetN)r   
__module____qualname__pytestmarkunitrX   rx   r   r   r   r   r   r   r   r   rM   rM   rM   rN   rP   c   s*    







C
,rP   )$r   r   rd   lhotser   r   lhotse.testing.dummiesr   	omegaconfr   r   r   nemo.collections.asrcollectionsasrr   nemo.collections.asr.datar   .nemo.collections.asr.data.audio_to_text_lhotser	   nemo.collections.asr.modelsr
   r   2nemo.collections.asr.parts.submodules.ctc_decodingr   r   +nemo.collections.asr.parts.utils.rnnt_utilsr   3nemo.collections.common.parts.preprocessing.parsersr   nemo.utils.config_utilsr   r   fixturerO   rP   rM   rM   rM   rN   <module>   s"   
B