o
    }oi,                     @   s   d dl Z d dlZd dlZd dlZd dlmZmZ d dlmZ d dl	m
Z
mZmZ d dlmZ e dd Ze dd	 ZG d
d dZG dd deZdS )    N)
DictConfig
ListConfig)audio_to_label)EncDecClassificationModelEncDecFrameClassificationModelconfigs) assert_dataclass_signature_matchc                  C      dt i d} dddddddgdgdgd	d
dddd
gdd}ddddd}tt| t|t|tdd tdD d}t|d}|S )N>nemo.collections.asr.modules.AudioToMelSpectrogramPreprocessorclsparams+nemo.collections.asr.modules.ConvASREncoder@   reluT               F
filtersrepeatkernelstridedilationdropoutresidual	separablesese_context_sizefeat_in
activation	conv_maskjasperz9nemo.collections.asr.modules.ConvASRDecoderClassification   )r!   num_classesc                 S      g | ]	}d  |d qS zdummy_cls_{}r   format.0i r.   g/home/ubuntu/.local/lib/python3.10/site-packages/tests/collections/asr/test_asr_classification_model.py
<listcomp>B       z/speech_classification_model.<locals>.<listcomp>preprocessorencoderdecoderlabelscfg)dictr   r   ranger   r3   r4   r5   modelConfigmodelr.   r.   r/   speech_classification_model   B   
r>   c                  C   r	   )Nr
   r   r   r   r   Tr   r   r   Fr   r   r    z2nemo.collections.common.parts.MultiLayerPerceptron   )hidden_sizer&   c                 S   r'   r(   r)   r+   r.   r.   r/   r0   p   r1   z.frame_classification_model.<locals>.<listcomp>r2   r7   )r9   r   r   r:   r   r;   r.   r.   r/   frame_classification_modelI   r?   rB   c                   @   s\   e Zd Zejjdd Zejjdd Zejjdd Zejjdd Z	ejjd	d
 Z
dS )TestEncDecClassificationModelc           	      C   sR   |  }d}d}d}|| | }|j|ksJ | }t|}t|ts'J d S )Ni     i  )trainnum_weightsto_config_dictr   from_config_dict
isinstance)	selfr>   	asr_modelconv_cntbn_cntdec_cntparam_countconfdict	instance2r.   r.   r/   test_constructorx   s   
z.TestEncDecClassificationModel.test_constructorc           
      C   s  |  }d|jj_d|jj_tjdd}tjdddgd}t 9 g }t	|
dD ]}|j|||d	  |||d	  d
}|| q+t|d}|j||d
}W d    n1 s]w   Y  |j|jksjJ tt|| }	|	dkszJ tt|| }	|	dksJ d S )Nr   r   )   i   )size   i  rS   )lowhighrT   r   )input_signalinput_signal_lengthgư>)evalr3   
featurizerditherpad_totorchrandnrandintno_gradr:   rT   forwardappendcatshapemeanabsmax)
rJ   r>   rK   rX   lengthlogprobs_instancer-   logprobs_inslogprobs_batchdiffr.   r.   r/   test_forward   s(   


z*TestEncDecClassificationModel.test_forwardc                 C   s   |  }t|jj}|j}|j|d ||jksJ t|}|d |d |d |j|d |j|d|jj	d   ksDJ d S )N)
new_labelsdummy_cls_31dummy_cls_32dummy_cls_33   r   )
rE   copydeepcopy_cfgr6   rF   change_labelsrc   r5   _feat_in)rJ   r>   rK   
old_labelsnw1ro   r.   r.   r/   test_vocab_change   s   



"z/TestEncDecClassificationModel.test_vocab_changec                    s  ddg} fdd|D }|  }|j|dd}t|dks J |d jtdgks-J d	g|j_|j|dd}t|dksAJ |d jtd	gksNJ dd	g|j_|j|dd}t|dkscJ |d jtddgksqJ |d jtdd	gksJ |jjdd	gksJ dg|j_|j|dd
d}t|dksJ |d jtt|jj	gksJ d	g|j_|j|dd
d}t|dksJ |d jtt|jj	gksJ d S )Nzan22-flrp-b.wavzan90-fbbh-b.wavc              
      s"   g | ]}t j d ddd|qS )asrrE   an4wav)ospathjoin)r,   fptest_data_dirr.   r/   r0      s   " zDTestEncDecClassificationModel.test_transcription.<locals>.<listcomp>   )
batch_sizer   r   r@   T)r   logprobs)
rZ   
transcribelenre   r^   Size	_accuracytop_kr8   r6   )rJ   r>   r   audio_filenamesaudio_pathsr=   resultsr.   r   r/   test_transcription   s0   

"
&z0TestEncDecClassificationModel.test_transcriptionc                 C   R   g d}ddi}t tjtj||d}|\}}}|sJ |d u s!J |d u s'J d S )N)	is_tarrednum_workersr   tarred_audio_filepathsshuffle
pin_memory	drop_lasttarred_shard_strategy	shuffle_nr[   
vad_stream
int_valuessample_ratenormalize_audio	augmentorbucketing_batch_sizebucketing_strategybucketing_weightstrim_silencetrimignore_args
remap_args)r   r   AudioToSpeechLabelDatasetr   !EncDecClassificationDatasetConfigrJ   IGNORE_ARGS
REMAP_ARGSresultsignatures_match
cls_subsetdataclass_subsetr.   r.   r/   Dtest_EncDecClassificationDatasetConfig_for_AudioToSpeechLabelDataset   s   
zbTestEncDecClassificationModel.test_EncDecClassificationDatasetConfig_for_AudioToSpeechLabelDatasetN)__name__
__module____qualname__pytestmarkunitrR   rn   r{   r   r   r.   r.   r.   r/   rC   w   s    



&rC   c                   @   sB   e Zd Zejddgg dejjdd Zejjdd ZdS )	"TestEncDecFrameClassificationModel
logits_len
labels_len))   
   )   r   )   r   )r   	   )r      c           	      C   s   |  }td|d}td|}tg d}tg d}|j||||d\}}|d|dks6J t|tg dsCJ d S )NrS   r   )         r   )r@   r   r   r   )logitsr6   r   r   r   )rZ   r^   onestensorreshape_labelsrT   equal)	rJ   rB   r   r   r=   r   r6   
labels_newlabels_len_newr.   r.   r/   test_reshape_labels  s   
z6TestEncDecFrameClassificationModel.test_reshape_labelsc                 C   r   )N)r   r   r   r   r   r   r   r   r   r[   r   r   r   r   r   r   r   r   	delimiternormalize_audio_dbnormalize_audio_db_targetwindow_length_in_secshift_length_in_secr   r   r   )r   r   AudioToMultiLabelDatasetr   r   r   r.   r.   r/   Itest_EncDecClassificationDatasetConfig_for_AudioToMultiSpeechLabelDataset  s   
zlTestEncDecFrameClassificationModel.test_EncDecClassificationDatasetConfig_for_AudioToMultiSpeechLabelDatasetN)	r   r   r   r   r   parametrizer   r   r   r.   r.   r.   r/   r     s    r   )rt   r   r   r^   	omegaconfr   r   nemo.collections.asr.datar   nemo.collections.asr.modelsr   r   r   nemo.utils.config_utilsr   fixturer>   rB   rC   r   r.   r.   r.   r/   <module>   s   
-
- 