o
    }oiV=                     @   sx   d dl Z d dlZd dlmZ d dlmZmZ d dlmZ e 	 dd Z
e 	 dd ZG d	d
 d
ZG dd dZdS )    N)
DictConfig)!EncDecDenoiseMaskedTokenPredModelSpeechEncDecSelfSupervisedModel)	typecheckc                  C   s8  dt dddd} ddd}d	d
dd|d ddgdgdgdddddd
|d ddgdgdgdddddd
|d ddgdgdgdddddd
gdd}dddddd}d|d d|d dddddd
|d ddd
dddd	dd|d d d!d"dd#d$d%d&}tt| t|t|t|t|d'}t|d(}|S ))N>nemo.collections.asr.modules.AudioToMelSpectrogramPreprocessor   r   )pad_toditherclsparams       )
enc_hiddendec_out+nemo.collections.asr.modules.ConvASREncoder@   reluTr              F
filtersrepeatkernelstridedilationdropoutresidual	separablesese_context_sizefeat_in
activation	conv_maskjasperz4nemo.collections.asr.modules.MaskedPatchAugmentation      g      ?)_target_
freq_masks
freq_width
patch_sizemask_patchesz9nemo.collections.asr.modules.ConvASRDecoderReconstructionr   )r)   r#   feat_hiddenfeat_outstride_layersnon_stride_layersstride_transposez+nemo.collections.asr.losses.ContrastiveLoss)	r)   in_dimproj_dimcombine_time_stepsquantized_targetscodebook_sizesample_from_same_utterance_onlysample_from_non_maskednum_negatives)decoderloss+nemo.collections.asr.modules.ConvASRDecoder   r)   r#   num_classes#nemo.collections.asr.losses.MLMLossr)   r5   contr)r;   r<   targets_from_loss)rC   mlm)preprocessorspec_augmentmodel_defaultsencoder	loss_listcfg)dictr   r   )rF   rH   rI   rG   loss_list_contr_mlmmodelConfig_contr_mlm	ssl_model rQ   Y/home/ubuntu/.local/lib/python3.10/site-packages/tests/collections/asr/test_ssl_models.pyrP      s   
0
"
	rP   c                  C   s  dddddddddd	} d	| d
 dddddddddddd}d|d dd| d ddgdgdgdddddd
| d ddgdgdgdddddd
| d ddgdgdgdddddd
gdd}dddddd }d!d"d|d ddd#}d$|d | d% | d& | d' d(d| d) | d* d+	}d,| d | d' | d& | d) dd-}d.| d* d/| d& | d) d0}d1d2d3d4gd5d6}t t |t |t | t |t |t |t |t |t |d7	}	t|	d8}
|
S )9Nr   r   r   i>  r   Fpre_conv)	subsampling_factorr   r   sample_rater@   	num_bookscode_dimsqueeze_singlemask_positionr   rU   per_featureg?g{Gz?hannP   i   Tgh㈵>r   )r)   rU   	normalizewindow_sizewindow_stridewindowfeaturesn_fftlogframe_splicingr	   r   	pad_valuer   ra   r   r   r   r   r"   r
   z4nemo.collections.asr.modules.SpectrogramAugmentationr   g?)r)   r*   
time_masksr+   
time_widthz/nemo.collections.asr.modules.RandomBlockMasking(   )r)   
block_size	mask_probr#   freezeallow_overlapz<nemo.collections.asr.modules.RandomProjectionVectorQuantizerrW   rV   r@   l2rX   rT   )	r)   r#   rW   rV   r@   dist_fnrk   rX   r5   z0nemo.collections.asr.modules.MultiSoftmaxDecoder)r)   r#   r@   num_decodersrX   use_biasz(nemo.collections.asr.losses.MultiMLMLossg?)r)   r5   mask_thresholdro   rX   adamwg      @g?g\(\?gMbP?)namelrbetasweight_decay)	rF   rG   rH   masking	quantizerrI   r;   r<   optimrK   )r   r   )rH   rF   rI   rG   rw   rx   r;   r<   ry   model_configrP   rQ   rQ   rR   denoise_mlm_ssl_model   s   0	


r{   c                   @   sL   e Zd Zejjdd Zejjdd Zejjdd Zejjdd Z	d	S )
TestSSLModelc                 C   s$   |  }t|}t|tsJ d S )N)to_config_dictr   from_config_dict
isinstance)selfrP   confdict	instance2rQ   rQ   rR   test_constructor  s   
zTestSSLModel.test_constructorc                 C   s   |  }t|d }|d= d|d d d< t||d< t|d}tjdd	}tjd
ddgd}t  |j||d\}}}}	|	||||	\}
}W d    n1 sSw   Y  t
|dks`J d S )NrJ   rE   FrC   r<   r6   rK         size逻  r   r   lowhighr   input_signalinput_signal_lengthr   r}   rM   r   r   torchrandnrandintno_gradforwarddecoder_loss_steplen)r   rP   modelConfig_contr_nonquantloss_list_contr_nonquantr   lengthspectrograms
spec_masksencodedencoded_len
loss_valueloss_val_dictrQ   rQ   rR   test_contr_nonquant$  s   

z TestSSLModel.test_contr_nonquantc           
      C   s   t jdd}t jdddgd}t   |j||d\}}}}W d    n1 s)w   Y  |||||\}}	t|	dks@J d S )	Nr   r   r   r   r   r   r      )r   r   r   r   r   r   r   )
r   rP   r   r   r   r   r   r   r   r   rQ   rQ   rR   test_contr_mlm=  s   
zTestSSLModel.test_contr_mlmc                 C   s   |  }|d }t|d }d|d ddddd	d
dd|d< d|d ddddd	ddd|d< t||d< t|d}tjdd}tjdddgd}t  |j||d\}}}	}
|	|||	|
\}}W d    n1 spw   Y  t
|dks}J d S )NrH   rJ   r=   r   r>   r?   rA   r   rB   z	encoder.0rC   )r;   r<   output_from_layerrD   mlm_2z	encoder.1mlm_3rK   r   r   r   r   r   r   r   r   )r   rP   modelConfig_contr_mlm_multirH   loss_list_contr_mlm_multir   r   r   r   r   r   r   r   rQ   rQ   rR   test_contr_mlm_multiL  s<   




z!TestSSLModel.test_contr_mlm_multiN)
__name__
__module____qualname__pytestmarkunitr   r   r   r   rQ   rQ   rQ   rR   r|     s    


r|   c                   @   s2   e Zd Zejjdd ZejjdefddZdS )TestDenoiseMLMSSLModelc              	   C   s  t jdd}t jdddgd}dt | }|| }|}t  ) t  |j||||d\}}}	}
W d    n1 s=w   Y  W d    n1 sLw   Y  |d	dksZJ |d
|j	j
jksfJ |d	dksoJ |	d	dksxJ |
d	dksJ |	 dksJ d S )Nr   r   r   r   r   r   皙?)r   r   noisy_input_signalnoisy_input_signal_lengthr   r   r   )r   r   r   	ones_liker   r   disable_checksr   r   rL   rH   r@   sum)r   r{   r   input_lengthnoiser   noisy_input_length	log_probsr   maskstokensrQ   rQ   rR   test_forwardy  s,   

	z#TestDenoiseMLMSSLModel.test_forwardr{   c              	   C   s8  t jdd}t jdddgd}dt | }|| }|}t  * t  |j||||dd	\}}}	}
W d    n1 s>w   Y  W d    n1 sMw   Y  |j|	||
|d
}|	ddksdJ |	d|j
jjkspJ |	ddksyJ |		ddksJ |
	ddksJ |	 dksJ t |rJ d S )Nr   r   r   r   r   r   r   T)r   r   r   r   
apply_mask)r   decoder_outputstargetsdecoder_lengthsr   r   r   )r   r   r   r   r   r   r   r   r<   r   rL   rH   r@   r   isnan)r   r{   r   r   r   r   r   r   r   r   r   r   rQ   rQ   rR   test_forward_masked  s6   


z*TestDenoiseMLMSSLModel.test_forward_maskedN)	r   r   r   r   r   r   r   r   r   rQ   rQ   rQ   rR   r   x  s
    
r   )r   r   	omegaconfr   nemo.collections.asr.modelsr   r   nemo.core.classes.commonr   fixturerP   r{   r|   r   rQ   rQ   rQ   rR   <module>   s   
n
 [