o
    }oi6e                     @   s&  d dl Z d dlZd dlZd dlZd dlmZ d dlZd dl	Z	d dl
Zd dlZd dlmZ d dlmZ e	jddgddd Ze	 d	d
 Ze	 dd Ze	 dd Ze	 dd Ze	jdd Ze	 dd Ze	 dd ZG dd dZG dd dZG dd dZG dd dZdS )    N
DictConfig)PredictiveAudioToAudioModelnemo_manifestlhotse_cuts)paramsc              
   C   s  d}d}t |D ]}tj|d}t| d| d |d q|jdkrstj	| d =}t |D ]%}tj
| d| d }tjd| dd|d |d	|id
}|| q1t| d ddddW  d    S 1 slw   Y  d S |jdkr| d d3}	t |D ]&}t| d| d t| d| d |d dd}
|	t|
 d qW d    n1 sw   Y  t| d ddddddS td|j d)N   >     audio_z.wavr   z
cuts.jsonlr   target_recording)idstartchannelduration	recordingcustomT   )	cuts_path
use_lhotse
batch_sizenum_workersr   zsmall_manifest.jsonlw)noisy_filepathclean_filepathr   offset
r   r   F)manifest_filepath	input_key
target_keyr   r   r   zDataset type z not implemented)rangenprandomrandnsfwriteparamlhotseCutSetopen_writer	Recording	from_fileMonoCutstropenjsondumpsNotImplementedError)tmp_pathrequest	num_filesnum_samplesidatawriterr   cutfentry r<   h/home/ubuntu/.local/lib/python3.10/site-packages/tests/collections/audio/test_audio_models_predictive.pymock_dataset_config   sV   

$


	r>   c                  C   s   ddddd} dddd	d
d}d|d |d |d |d d}dddg ddddd}ddi}t | d | d | d | d t |t |t |t |dddd d!	}tj  tjd t|d"} W d    | S 1 smw   Y  | S )#Nr	   r
   T2   sample_ratenum_outputsnormalize_inputmax_utts_evaluation_metrics<nemo.collections.audio.modules.transforms.AudioToSpectrogram           ?Q?_target_
fft_length
hop_lengthmagnitude_powerscale<nemo.collections.audio.modules.transforms.SpectrogramToAudiorL   rM   rN   rO   z^nemo.collections.audio.parts.submodules.ncsnpp.SpectrogramNoiseConditionalScoreNetworkPlusPlus)r   r   r   r   r      @   r   )rK   in_channelsout_channelschannelsnum_res_blockspad_time_topad_dimension_torK   %nemo.collections.audio.losses.MSELossrA   rB   rC   rD   AdamMbP?g?g\(\?	optimizerlrbetas	rA   rB   rC   rD   encoderdecoder	estimatorlossoptimcfgr   torchr"   fork_rngmanual_seedr   modelrb   rc   rd   re   model_configr<   r<   r=   predictive_model_ncsnP   sb   
rp   c                  C   s   ddddd} dddd	d
d}d|d |d |d |d d}dddddddddd ddddgdd}ddi}t | d | d | d | d  t |t |t |t |d!d"d#d$d%	}tj  tjd& t|d'} W d    | S 1 stw   Y  | S )(Nr	   r
   Tr?   r@   rE   rF   rG   rH   rI   rJ   rP   rL   rM   rN   rO   Fnemo.collections.audio.parts.submodules.conformer.SpectrogramConformer   r   rR   rel_pos
layer_normFregularrK   rS   rT   feat_inn_layersd_modelsubsampling_factorself_attention_modeln_headsconv_context_sizeconv_norm_typecausal_downsamplingatt_context_sizeatt_context_stylerK   rY   rA   rB   rC   rD   rZ   r[   r\   r]   ra   r   rg   ri   rm   r<   r<   r=   predictive_model_conformer   p   
r   c                  C   s   ddddd} dddd	d
d}d|d |d |d |d d}ddddddddddddddgdd}ddi}t | d | d | d  | d! t |t |t |t |d"d#d$d%d&	}tj  tjd' t|d(} W d    | S 1 stw   Y  | S ))Nr	   r
   Tr?   r@   rE   rF   rG   rH   rI   rJ   rP   rL   rM   rN   rO   rq   rr   r   rR   rs   causalrt   f      chunked_limitedrw   rK   rY   rA   rB   rC   rD   rZ   r[   r\   r]   ra   r   rg   ri   rm   r<   r<   r=   $predictive_model_streaming_conformer   r   r   c                  C   s   ddddd} dddd	d
d}d|d |d |d |d d}ddddddddd}ddi}t | d | d | d | d t |t |t |t |dddd d!	}|S )"Nr	   r
   Tr?   r@   rE   rF   rG   rH   rI   rJ   rP   rL   rM   rN   rO   zRnemo.collections.audio.parts.submodules.transformerunet.SpectrogramTransformerUNetrr   r   rR   F)rK   rS   rT   freq_dimdepthdimheadsadaptive_rmsnormrK   rY   rA   rB   rC   rD   adamr[   r\   )namer_   r`   ra   r   rm   r<   r<   r=   -predictive_model_transformer_unet_params_base  sX   r   c                 C   s`   t |di }| D ]#\}}|| v r)t| | tr)| D ]
\}}|| | |< qq
|| |< q
| S )Nr&   )getattritems
isinstancer   )r   r3   	overridessectionvalueskvr<   r<   r=   (predictive_model_transformer_unet_paramsX  s   

r   c                 C   sF   t j  t jd t| d}W d    |S 1 sw   Y  |S )Nr   rg   )rj   r"   rk   rl   r   )r   rn   r<   r<   r=   !predictive_model_transformer_unetg  s   
r   c              	   C   s   i |ddi| d< i |ddi| d< dddddd	d
dd}|| d< t jdi |}tj  tjd t| |d}W d    ||fS 1 sJw   Y  ||fS )NshuffleTtrain_dsFvalidation_dsru   r   r      cpu)
max_epochs	max_stepsloggeruse_distributed_samplerval_check_intervallimit_train_batchesacceleratorenable_checkpointingtrainerr   )rh   r   r<   )plTrainerrj   r"   rk   rl   r   )r   r>   trainer_cfgr   rn   r<   r<   r=   ?predictive_model_transformer_unet_with_trainer_and_mock_datasetp  s(   

r   c                   @   B   e Zd ZdZejjdd Zejjejdg ddd Z	dS )	TestPredictiveModelNCSNz*Test predictive model with NCSN estimator.c                 C   ,   |  }| }t|}t|tsJ dS z:Test that the model can be constructed from a config dict.Ntrainto_config_dictr   from_config_dictr   )selfrp   rn   confdict	instance2r<   r<   r=   test_constructor     
z(TestPredictiveModelNCSN.test_constructorbatch_size, sample_lenr   r   )r   r   )r
   
   c                 C   N  |  }| }|d }t }|d tj|d|| f|d}|| tj|tjd }	d}
t J g }g }t	|
dD ]!}|j|||d  |	||d  d\}}|| || q?t|d}t|d}|j||	d\}}W d   n1 sw   Y  |j|jksJ |j|jksJ tt|| }||
ksJ dS 	z.Test that the model can run forward inference.rA   r   r
   size	generatordtypeg-C6
?input_signalinput_lengthNevalr   rj   	Generatorrl   r#   onesintno_gradr    r   forwardappendcatshapemaxabs)r   rp   r   
sample_lenrn   r   sampling_raterngr   input_signal_lengthabs_toloutput_listoutput_length_listr6   outputoutput_lengthoutput_instanceoutput_length_instanceoutput_batchoutput_length_batchdiffr<   r<   r=   test_forward_infer  6   



z*TestPredictiveModelNCSN.test_forward_inferN
__name__
__module____qualname____doc__pytestmarkunitr   parametrizer   r<   r<   r<   r=   r         
r   c                   @   r   )	TestPredictiveModelConformerz/Test predictive model with conformer estimator.c                 C   r   r   r   )r   r   rn   r   r   r<   r<   r=   r     r   z-TestPredictiveModelConformer.test_constructorr   r   c                 C   r   r   r   )r   r   r   r   rn   r   r   r   r   r   r   r   r   r6   r   r   r   r   r   r   r   r<   r<   r=   r     r   z/TestPredictiveModelConformer.test_forward_inferNr   r<   r<   r<   r=   r     r   r   c                   @   r   )	%TestPredictiveModelStreamingConformerz9Test predictive model with streaming conformer estimator.c                 C   r   r   r   )r   r   rn   r   r   r<   r<   r=   r     r   z6TestPredictiveModelStreamingConformer.test_constructorr   r   c                 C   r   r   r   )r   r   r   r   rn   r   r   r   r   r   r   r   r   r6   r   r   r   r   r   r   r   r<   r<   r=   r     r   z8TestPredictiveModelStreamingConformer.test_forward_inferNr   r<   r<   r<   r=   r     r   r   c                   @   s   e Zd ZdZejjdd Zejjejdg ddd Z	ejjejddgejjd	d
ddiigdddd Z
dd Zdd ZdS )"TestPredictiveModelTransformerUNetz6Test predictive model with transformer_unet estimator.c                 C   r   r   r   )r   r   rn   r   r   r<   r<   r=   r   >  r   z3TestPredictiveModelTransformerUNet.test_constructorr   r   c                 C   r   r   r   )r   r   r   r   rn   r   r   r   r   r   r   r   r   r6   r   r   r   r   r   r   r   r<   r<   r=   r   F  r   z5TestPredictiveModelTransformerUNet.test_forward_inferr   r   rd   r   T)indirectc              	   C   s   |  }| }|d }t }|d tj|d|| f|d}|| tj|tjd }	t	t
. t  |j||	d\}
}
W d   n1 sKw   Y  W d   dS W d   dS 1 scw   Y  dS )zKTest that the predictive model raises TypeError when adaptive RMS turned onrA   r   r
   r   r   r   N)r   r   rj   r   rl   r#   r   r   r   raises	TypeErrorr   r   )r   r   r   r   rn   r   r   r   r   r   _r<   r<   r=   test_adaptive_rms_ebabled_failst  s   

"zBTestPredictiveModelTransformerUNet.test_adaptive_rms_ebabled_failsc           
      C   s   |\}}|  }t|jdD ]G}t|tr%|d }|d }|d|}n|\}}}}|jdkr6t	|d}|jdkrAt	|d}|j
||d\}}|j|||d}	|	  qd S )Nr   r   r   target_signalzB T -> B 1 Tr   )estimatetargetr   )r   	itertoolsislice	_train_dlr   dictgetndimeinops	rearranger   re   backward)
r   r   rn   r   batchr   r   r   output_signalre   r<   r<   r=   test_training_step  s    



z5TestPredictiveModelTransformerUNet.test_training_stepc                 C   s   |\}}|  }|| dS )zj
        Test that the model can be trained for a few steps. An evaluation step is also expected.
        N)r   fit)r   r   rn   r   r<   r<   r=   test_model_training  s   z6TestPredictiveModelTransformerUNet.test_model_trainingN)r   r   r   r   r   r   r   r   r   r   r   r  r  r<   r<   r<   r=   r   ;  s,    
%r   )r   r/   r   r'   lightning.pytorchpytorchr   numpyr!   r   	soundfiler$   rj   	omegaconfr   nemo.collections.audio.modelsr   fixturer>   rp   r   r   r   r   r   r   r   r   r   r   r<   r<   r<   r=   <module>   s>   
2
>
G
G
8


:::