o
    }oia                     @   s   d dl Z d dlZd dlZd dlZd dlZd dlmZmZm	Z	 d dl
mZmZmZmZ d dlmZ d dlmZ d dlmZ d dlmZ eeZG dd	 d	Ze d
d Ze dd Ze dd Ze dd Ze dd Ze dd Z dS )    N)
DictConfig
ListConfig	OmegaConf)EncDecClassificationModelEncDecCTCModelEncDecRNNTModelEncDecSpeakerLabelModel)asr_module_utils)LinearAdapterConfig)numba_utils)__NUMBA_MINIMUM_VERSION__c                   @   s4  e Zd Zejdejjdd Zejdejjdd Zejdejjdd Z	ejdejjdd	 Z
ejjejdejjd
d Zejdejjdd Zejdejjdd Zejdejjdd Zejdejjdd Zejdejjdd Zdd ZdS )TestExportableGPUc                 C   s   t t | jt | jt | jd}t|d }t :}tj	
|d}|j|dd t|}tjj|dd |jjd jdksCJ |jjd jd	ksNJ W d    d S 1 sYw   Y  d S )
Npreprocessorencoderdecodercfgqn.onnxToutputcheck_trace
full_checkr   audio_signallogprobs)r   r   encoder_dictdecoder_dictr   cudatempfileTemporaryDirectoryospathjoinexportonnxloadcheckercheck_modelgraphinputnamer   )selfmodel_configmodeltmpdirfilename
onnx_model r3   ^/home/ubuntu/.local/lib/python3.10/site-packages/tests/collections/asr/test_asr_exportables.py"test_EncDecCTCModel_export_to_onnx%   s"   

"z1TestExportable.test_EncDecCTCModel_export_to_onnxc                 C   s   |  }t :}tj|d}|j|dd t|}tj	j
|dd |jjd jdks/J |jjd jdks:J W d    d S 1 sEw   Y  d S )Nzedc.onnxTr   r   r   r   logitsr   r    r!   r"   r#   r$   r%   r&   r'   r(   r)   r*   r+   r,   r   )r-   speech_classification_modelr/   r0   r1   r2   r3   r3   r4   -test_EncDecClassificationModel_export_to_onnx:   s   

"z<TestExportable.test_EncDecClassificationModel_export_to_onnxc                 C   s   |  }t 9}tj|d}|j|d t|}tj	j
|dd |jjd jdks.J |jjd jdks9J W d    d S 1 sDw   Y  d S )Nzsl.onnxr   Tr   r   r   r6   r7   )r-   speaker_label_modelr/   r0   r1   r2   r3   r3   r4   +test_EncDecSpeakerLabelModel_export_to_onnxH   s   

"z:TestExportable.test_EncDecSpeakerLabelModel_export_to_onnxc                 C   s   |  }t D}tj|d}|j|d t|}tj	j
|dd |jjd jdks.J |jjd jdks9J |jjd jd	ksDJ W d    d S 1 sOw   Y  d S )
Nz
citri.onnxr:   Tr   r   r      lengthr   r7   r-   citrinet_modelr/   r0   r1   r2   r3   r3   r4   'test_EncDecCitrinetModel_export_to_onnxT   s   

"z6TestExportable.test_EncDecCitrinetModel_export_to_onnxc              	   C      |  }t X}tj j 9 tj|d}t	|
 j}tjd|jjd|d}tj|jd fd|d}|j|t||gdd W d    n1 sKw   Y  W d    d S W d    d S 1 scw   Y  d S )	Nz	conf.onnx   	  devicer   size
fill_valuerF   Tr   input_exampler   r   r    r!   torchampautocastr"   r#   r$   next
parametersrF   randnr   _feat_infullshaper%   tuple)r-   conformer_modelr/   r0   r1   rF   rK   input_example_lengthr3   r3   r4   "test_ConformerModel_export_to_onnxa   s   Pz1TestExportable.test_ConformerModel_export_to_onnxc              	   C   rB   )	Nz
squeeze.tsrC   rD   rE   r   rG   TrJ   rL   )r-   squeezeformer_modelr/   r0   r1   rF   rK   rX   r3   r3   r4   &test_SqueezeformerModel_export_to_onnxo   s   Pz5TestExportable.test_SqueezeformerModel_export_to_onnxc              	   C   s   |  }tj|ddd t c}tj j D tj	
|d}|j|dd t|}tjj|dd |jjd jd	ks>J |jjd
 jdksIJ |jjd jdksTJ W d    n1 s^w   Y  W d    d S W d    d S 1 svw   Y  d S )N   F)context_windowupdate_configzcitri_se.onnxTr   r   r   r   r=   r>   r   )r   r	   !change_conv_asr_se_context_windowr    r!   rM   rN   rO   r"   r#   r$   r%   r&   r'   r(   r)   r*   r+   r,   r   r?   r3   r3   r4   2test_EncDecCitrinetModel_limited_SE_export_to_onnxz   s   
PzATestExportable.test_EncDecCitrinetModel_limited_SE_export_to_onnxc                 C   s  |  }t x}d}tj||}|j|dd\}}tj|d| }|d |ks-J tj|s5J t	|}	tj
j|	dd t|	jjdksLJ t|	jjdksVJ |	jjd jd	ksaJ |	jjd
 jdkslJ |	jjd jdkswJ |	jjd
 jdksJ tj|d| }
|d
 |
ksJ tj|
sJ t	|
}	tj
j|	dd |j }t|d tksJ t|d }t|jj d }t|	jjd
t|d
  | ksJ |	jjd jdksJ |	jjd
 jdksJ |	jjd jdksJ |dkr!t|	jjdd  D ]\}}|jd| d t|d
  ksJ q	t|	jjt|d
 | ks2J |	jjd jdks>J |	jjd
 jdksJJ |dkrqt|	jjdd  D ]\}}|jd| d t|d
  ksoJ qYW d    d S W d    d S 1 sw   Y  d S )Nzcitri_rnnt.onnxF)r   verboseencoder-r   Tr      r   r=   r>   outputsencoded_lengthsdecoder_joint-encoder_outputstargetstarget_length   input__prednet_lengthsoutput_)r   r    r!   r"   r#   r$   r%   existsr&   r'   r(   r)   lenr*   r+   r   r,   r   rK   typerV   listoutput_typeskeys	enumeratestr)r-   citrinet_rnnt_modelr/   r0   fnr1   filesdescrencoder_filenamer2   decoder_joint_filenameinput_examples
num_states
state_nameidxipopr3   r3   r4   #test_EncDecRNNTModel_export_to_onnx   sT   


$
("
(+$z2TestExportable.test_EncDecRNNTModel_export_to_onnxc                 C   s"  |  }t }d}tj||}|j|ddd\}}tj|d| }|d |ks-J tj|s5J tj	
|}	|	d usAJ |	jjjdd  }
|
d jdksSJ |
d jd	ks\J tj|d
| }|d |ksmJ tj|suJ tj	
|}|d usJ |jjjdd  }|j }t|d tksJ t|d }t|jj d }t|dt|d  | ksJ |d jdksJ |d jdksJ |d jdksJ |dkrt|dd  D ]\}}|jd| d t|d  ksJ qW d    d S W d    d S 1 s
w   Y  d S )Nzcitri_rnnt.tsFT)r   ra   r   rb   r   r=   r   r>   rf   rg   rh   ri   rc   rj   rk   rl   rm   )r   r    r!   r"   r#   r$   r%   rp   rM   jitr'   forwardschema	argumentsr,   r   rK   rr   rV   rq   rs   rt   ru   rv   rw   )r-   rx   r/   r0   ry   r1   rz   r{   r|   
ts_encoderr   r}   ts_decoder_jointts_decoder_joint_argsr~   r   r   r   r   r3   r3   r4   !test_EncDecRNNTModel_export_to_ts   sD   

 $&$z0TestExportable.test_EncDecRNNTModel_export_to_tsc                 C   s   t t | jt | jt | jd}|jjd |j_t|d}tt	|jj
jd jdd}|jd|d | }t :}tj|d}|j|d	d
 t|}tjj|d	d |jjd jdkscJ |jjd jdksnJ W d    d S 1 syw   Y  d S )Nr   Adapterr   r       )in_featuresdimtempr   Tr   r   r   r   )r   r   r   r   r   clsr   r   
structuredr
   paramsjasperfiltersadd_adapterr   r    r!   r"   r#   r$   r%   r&   r'   r(   r)   r*   r+   r,   r   )r-   r.   r/   adapter_cfgr0   r1   r2   r3   r3   r4   *test_EncDecCTCModel_adapted_export_to_onnx   s.   	


"z9TestExportable.test_EncDecCTCModel_adapted_export_to_onnxc                 C   s`   dt i d| _dddddddgdgdgd	d
dddd
gdd| _dddg ddd| _d S )N>nemo.collections.asr.modules.AudioToMelSpectrogramPreprocessorr   r   +nemo.collections.asr.modules.ConvASREncoder@   reluT   r=           Frg   
r   repeatkernelstridedilationdropoutresidual	separablesese_context_sizefeat_in
activation	conv_maskr   +nemo.collections.asr.modules.ConvASRDecoder   ) abcdefghijklmnopqrstuvwxyz'r   num_classes
vocabulary)dictr   r   r   )r-   r3   r3   r4   setup_method  s6   zTestExportable.setup_methodN)__name__
__module____qualname__pytestmarkrun_only_onunitr5   r9   r<   rA   pleasefixmerY   r[   r`   r   r   r   r   r3   r3   r3   r4   r   $   sB    







	

1
,!r   c                  C   s   dt i d} dddddddgdgdgd	d
dddd
gdd}ddddd}tt| t|t|tdd tdD d}t|d}|S )Nr   r   r   r   r   Tr   r=   r   Frg   r   r   z9nemo.collections.asr.modules.ConvASRDecoderClassification   )r   r   c                 S   s   g | ]	}d  |d qS )zdummy_cls_{}r=   )format.0r   r3   r3   r4   
<listcomp>w  s    z/speech_classification_model.<locals>.<listcomp>)r   r   r   labelsr   )r   r   r   ranger   )r   r   r   modelConfigr/   r3   r3   r4   r8   S  s>   
r8   c                  C   sn   ddi} dddddddgdgdgd	d
d
dgd}dddddgd}t t | t |t |d}t|d}|S )N_target_r   r   r   r   T   r=   r   F)r   r   r   r   r   r   r   r   r   r   r   r   r   z+nemo.collections.asr.modules.SpeakerDecoderrc   	attentionr   )r   r   r   	pool_mode	emb_sizesr   r   )r   r   )r   r   r   r   speaker_modelr3   r3   r4   r;   ~  s8   
r;   c                  C   s   dt i d} ddddddd	gdgdgd
ddddd
dd	dgdgdgdddddddddd	dgdgdgdddddd
dddgdgdgd
ddddd
gdd}dddtdd tddD dd}tt| t|t|d}t|d}|S )Nr   r   r   P   r   Tr   r=      r   Frg   r      rc   皙?
stride_addr   r   r   r   r   r   r   r   r   r   stride_lastresidual_mode     )   r   r   r   c                 s       | ]	}t |d  V  qdS r   Nchrr   r3   r3   r4   	<genexpr>      z!citrinet_model.<locals>.<genexpr>r   r   r   r   r   rs   r   r   r   )r   r   r   r   citri_modelr3   r3   r4   r@     s|   >
r@   c            	      C   s0  t dd tddD } dddd}d	ti d
}dddddddgdgdgdddddd
dddgdgdgdddddddddddgdgdgdddddd
dddgdgdgdddddd
gd}dddddd }d!ddddd"d#}d$d%did&}tt|| t|t|t|t|t|d'}t|d(}|S ))Nc                 s   r   r   r   r   r3   r3   r4   r     r   z&citrinet_rnnt_model.<locals>.<genexpr>r   r   r      i@  )
enc_hiddenpred_hiddenjoint_hiddenr   r   r   r   r   Tr   r=   r   r   Frg   r   r   rc   r   r   r   r   r   r   z(nemo.collections.asr.modules.RNNTDecoder)r   pred_rnn_layersr   )r   prednetz&nemo.collections.asr.modules.RNNTJoint)r   r   r   )r   fuse_loss_werjointnetgreedy_batchmax_symbols)strategygreedy)r   r   model_defaultsr   r   jointdecodingr   )rs   r   r   r   r   )	r   r  r   r   r   r	  r
  r   r   r3   r3   r4   rx     s   <


rx   c                  C   s   dt i d} di dddddd	d
ddddddddd dd dddddddddddgdddddd d!d"d"d#d"d$d}d%dd&td'd( td)d&D d*d}tt| t|t|d+}t|d,}|S )-Nr   r   z-nemo.collections.asr.modules.ConformerEncoderr   r   feat_outrg   n_layersrc   d_modelr   subsamplingstridingsubsampling_factorrC   subsampling_conv_channelsr   	reductionreduction_positionreduction_factorr=   ff_expansion_factorself_attention_modelrel_posn_heads   att_context_sizexscalingTuntie_biasespos_emb_max_len     r   r   )conv_kernel_sizer   dropout_pre_encoderdropout_embdropout_attr   r   c                 s   r   r   r   r   r3   r3   r4   r   o  r   z"conformer_model.<locals>.<genexpr>r   r   r   r   r   r   r   r   r   rW   r3   r3   r4   rW   N  sh   	

rW   c                  C   s   dt i d} di dddddd	d
ddddd dddddddddddddddddgdddddd d!d"d#d"d$d}d%dd&td'd( td)d&D d*d}tt| t|t|d+}t|d,}|S )-Nr   r   z1nemo.collections.asr.modules.SqueezeformerEncoderr   r   r  rg   r  rc   adaptive_scaleTtime_reduce_idxr=   time_recovery_idxr  r   r  dw_stridingr  rC   r  r   r  r  r  r  r  r  r  r  r  r  r  r   r   )r   r   r"  r#  r   r   c                 s   r   r   r   r   r3   r3   r4   r     r   z&squeezeformer_model.<locals>.<genexpr>r   r   r   r   r   r$  r3   r3   r4   rZ   y  sf   	

rZ   )!r"   r    r&   r   
torch.cudarM   	omegaconfr   r   r   nemo.collections.asr.modelsr   r   r   r    nemo.collections.asr.parts.utilsr	   -nemo.collections.common.parts.adapter_modulesr
   nemo.core.utilsr   nemo.core.utils.numba_utilsr   numba_cuda_is_supportedNUMBA_RNNT_LOSS_AVAILABLEr   fixturer8   r;   r@   rx   rW   rZ   r3   r3   r3   r4   <module>   s6   
  1
*
&
K
\
*