o
    }oiGC                     @   s  d dl Z d dlZd dlZd dlZd dlZd dlmZ d dlZ	d dl
Z
d dlZd dlZd dlmZ d dlmZ e
jddgddd Ze
 d	d
 Ze
 dd Ze
 dd Ze
 dd Ze
 dd ZG dd dZG dd dZG dd dZdS )    N
DictConfig)EncMaskDecAudioToAudioModelnemo_manifestlhotse_cuts)paramsc              
   C   s  d}d}t |D ]}tj|d}t| d| d |d q|jdkrstj	| d =}t |D ]%}tj
| d| d }tjd| dd|d |d	|id
}|| q1t| d ddddW  d    S 1 slw   Y  d S |jdkr| d d3}	t |D ]&}t| d| d t| d| d |d dd}
|	t|
 d qW d    n1 sw   Y  t| d ddddddS td|j d)N   >     audio_z.wavr   z
cuts.jsonlr   target_recording)idstartchannelduration	recordingcustomT   )	cuts_path
use_lhotse
batch_sizenum_workersr   zsmall_manifest.jsonlw)noisy_filepathclean_filepathr   offset
r   r   F)manifest_filepath	input_key
target_keyr   r   r   zDataset type z not implemented)rangenprandomrandnsfwriteparamlhotseCutSetopen_writer	Recording	from_fileMonoCutstropenjsondumpsNotImplementedError)tmp_pathrequest	num_filesnum_samplesidatawriterr   cutfentry r<   b/home/ubuntu/.local/lib/python3.10/site-packages/tests/collections/audio/test_audio_models_mask.pymock_dataset_config   sV   

$


	r>   c                  C   s   dddd} dddd}d	|d
 |d d}d| d |d
 d d dddd}ddd}ddd}t | d | d t |t |t |t |t |ddddd}|S )Nr	   r
   Tsample_ratenum_outputsnormalize_input<nemo.collections.audio.modules.transforms.AudioToSpectrogram      _target_
fft_length
hop_length<nemo.collections.audio.modules.transforms.SpectrogramToAudiorH   rI   z7nemo.collections.audio.modules.masking.MaskEstimatorRNNrA   r      )rG   rA   num_subbandsnum_features
num_layersbidirectional;nemo.collections.audio.modules.masking.MaskReferenceChannelr   rG   ref_channel%nemo.collections.audio.losses.SDRLossrG   scale_invariantr@   adamMbP?g?g\(\?)namelrbetasr@   rA   encoderdecodermask_estimatormask_processorlossoptimr   modelr]   r^   r_   r`   ra   model_configr<   r<   r=   mask_model_rnn_paramsR   sP   	rf   c                 C   sF   t j  t jd t| d}W d    |S 1 sw   Y  |S )Nr   cfg)torchr"   fork_rngmanual_seedr   )rf   rd   r<   r<   r=   mask_model_rnn   s   
rl   c              	   C   s   i |ddi| d< i |ddi| d< dddddd	d
dd}|| d< t jdi |}tj  tjd t| |d}W d    ||fS 1 sJw   Y  ||fS )NshuffleTtrain_dsFvalidation_dsr   r      cpu)
max_epochs	max_stepsloggeruse_distributed_samplerval_check_intervallimit_train_batchesacceleratorenable_checkpointingtrainerr   )rh   r{   r<   )plTrainerri   r"   rj   rk   r   )rf   r>   trainer_cfgr{   rd   r<   r<   r=   ,mask_model_rnn_with_trainer_and_mock_dataset   s(   

r   c                  C   s   dddd} dddd}d	|d
 |d d}d| d |d
 d d ddddddddd dddd}ddd}ddd}t | d | d t |t |t |t |t |dd d!d"d#}t|d$} | S )%Nr	   r
   Tr?   rC   rD   rE   rF   rJ   rH   rI   z@nemo.collections.audio.modules.masking.MaskEstimatorFlexChannelsrA   r   rK   averagetransform_average_concatenateconformer_encoder   rq      mean_varmean)rG   rA   rL   
num_blockschannel_reduction_positionchannel_reduction_typechannel_block_typetemporal_block_typetemporal_block_num_layerstemporal_block_num_headstemporal_block_dimensionmag_reductionmag_normalizationuse_ipdipd_normalizationrP   r   rQ   rS   rT   r@   AdamrW   rX   )	optimizerrZ   r[   r\   rg   )r   r   rc   r<   r<   r=   mask_model_flexarray   sd   
r   c                 C   s<   |   }ddddddddd|d	 d
 d
|d< t|d}|S )Nz:nemo.collections.audio.modules.masking.MaskBasedBeamformerpmwfg        onebanmax_snrr
   Fr_   rL   )
rG   filter_typefilter_betafilter_rankfilter_postfilterrR   ref_hardref_hard_use_gradref_subband_weightingrL   r`   rg   )to_config_dictr   )r   re   rd   r<   r<   r=   bf_model_flexarray   s   


r   c                   @   sR   e Zd ZdZejjdd Zejjejdg ddd Z	dd	 Z
d
d ZdS )TestMaskModelRNNz+Test masking model with RNN mask estimator.c                 C   ,   |  }| }t|}t|tsJ dS z:Test that the model can be constructed from a config dict.Ntrainr   r   from_config_dict
isinstance)selfrl   rd   confdict	instance2r<   r<   r=   test_constructor     
z!TestMaskModelRNN.test_constructorzbatch_size, sample_len))rq   rq   )r   r   )r
   
   c                 C   sN  |  }| }|d }t }|d tj|d|| f|d}|| tj|tjd }	d}
t J g }g }t	|
dD ]!}|j|||d  |	||d  d\}}|| || q?t|d}t|d}|j||	d\}}W d   n1 sw   Y  |j|jksJ |j|jksJ tt|| }||
ksJ dS )	.Test that the model can run forward inference.r@   r   r
   size	generatordtypeh㈵>input_signalinput_lengthNevalr   ri   	Generatorrk   r#   onesintno_gradr    r   forwardappendcatshapemaxabs)r   rl   r   
sample_lenrd   r   sampling_raterngr   input_signal_lengthabs_toloutput_listoutput_length_listr6   outputoutput_lengthoutput_instanceoutput_length_instanceoutput_batchoutput_length_batchdiffr<   r<   r=   test_forward_infer  s6   



z#TestMaskModelRNN.test_forward_inferc           
      C   s   |\}}|  }t|jdD ]G}t|tr%|d }|d }|d|}n|\}}}}|jdkr6t	|d}|jdkrAt	|d}|j
||d\}}|j|||d}	|	  qd S )Nr   r   r   target_signalzB T -> B 1 Tr   )estimatetargetr   )r   	itertoolsislice	_train_dlr   dictgetndimeinops	rearranger   ra   backward)
r   r   rd   _batchr   r   r   output_signalra   r<   r<   r=   test_training_stepA  s    



z#TestMaskModelRNN.test_training_stepc                 C   s   |\}}|  }|| dS )zj
        Test that the model can be trained for a few steps. An evaluation step is also expected.
        N)r   fit)r   r   rd   r{   r<   r<   r=   test_model_trainingY  s   z$TestMaskModelRNN.test_model_trainingN)__name__
__module____qualname____doc__pytestmarkunitr   parametrizer   r   r   r<   r<   r<   r=   r     s    
%r   c                   @   B   e Zd ZdZejjdd Zejjejdg ddd Z	dS )	TestMaskModelFlexArrayz8Test masking model with channel-flexible mask estimator.c                 C   r   r   r   )r   r   rd   r   r   r<   r<   r=   r   e  r   z'TestMaskModelFlexArray.test_constructor$batch_size, num_channels, sample_len)rq   r
   rq   )r   r
   r   )r
   r
   r   )rq   rK   rq   )r   rK   r   )r
   rK   r   c                 C   N  |  }| }|d }t }|d tj|||| f|d}	|| tj|tjd }
d}t J g }g }t	|	
dD ]!}|j|	||d  |
||d  d\}}|| || q?t|d}t|d}|j|	|
d\}}W d   n1 sw   Y  |j|jksJ |j|jksJ tt|| }||ksJ dS 	r   r@   r   r   r   r   r
   r   Nr   )r   r   r   num_channelsr   rd   r   r   r   r   r   r   r   r   r6   r   r   r   r   r   r   r   r<   r<   r=   r   m  6   



z)TestMaskModelFlexArray.test_forward_inferN
r   r   r   r   r   r   r   r   r   r   r<   r<   r<   r=   r   b      
r   c                   @   r   )	TestBFModelFlexArrayz<Test beamforming model with channel-flexible mask estimator.c                 C   r   r   r   )r   r   rd   r   r   r<   r<   r=   r     r   z%TestBFModelFlexArray.test_constructorr   r   c                 C   r   r   r   )r   r   r   r   r   rd   r   r   r   r   r   r   r   r   r6   r   r   r   r   r   r   r   r<   r<   r=   r     r   z'TestBFModelFlexArray.test_forward_inferNr   r<   r<   r<   r=   r     r   r   )	importlibr   r/   r   r'   lightning.pytorchpytorchr|   numpyr!   r   	soundfiler$   ri   	omegaconfr   nemo.collections.audio.modelsr   fixturer>   rf   rl   r   r   r   r   r   r   r<   r<   r<   r=   <module>   s6   
2
7


B
Z=