o
    ॵiU                     @   sF  d dl Z d dlZd dlmZmZmZ d dlZd dlZ	d dl
m  mZ d dlZd dlm  mZ d dlZd dlmZ d dlmZ d dlmZ d dlmZ d dlmZmZ d dlmZ d d	lm Z  d d
l!m"Z" d dl#m$Z$m%Z% d dl&m'Z' d dl(m)Z) d dl*m+Z+m,Z,m-Z- dZ.e) Z/e"j0ej1dG dd de Z2G dd de	j3Z4dS )    N)DictOptionalUnion)autocast)Dataset)tqdm)Trainers)Model
TorchModel)	MsDataset)BaseTrainer)TRAINERS)DEFAULT_MODEL_REVISION	ModelFile)create_device)
get_logger)get_dist_infoget_local_rank	init_distsi-snr)module_namec                   @   s   e Zd ZdZdddefdededee deeee	f  deeee	f  dee fd	d
Z
dejjfddZdd Zdedeeef fddZdS )SeparationTraineraa  A trainer is used for speech separation.

    Args:
        model: id or local path of the model
        work_dir: local path to store all training outputs
        cfg_file: config file of the model
        train_dataset: dataset for training
        eval_dataset: dataset for evaluation
        model_revision: the git version of model on modelhub
    Nmodelwork_dircfg_filetrain_dataseteval_datasetmodel_revisionc              	   K   sj  t |tr| ||| _|d u rtj| jtj}n|d us"J dtj	|| _t
| | |  | _|| _|dd d urEt|d  t \}}	|	dk| _|dd}
| jr`t }d| }
t|
| _d|vr{t| jjdstJ d| jjj| _n|d | _|| _|| _tj| jd	}| j| jjj| jjjj| jjjj | jjjj!| jjj"j#| jjj"j$| jjj"j%d
}ddl&m'} t(|}|||d| _)W d    n1 sw   Y  t*j+| j||d ddddddd}| jj,dkr| jj, d| jj- |d< t*j.j/0| j| _1| j1| j)d< | j)d 2d| j1i | j3 }| j)d 2| t4|| j)d | j)|| j)d d| _5d S )Nz?Config file should not be None if model is not from pretrained!launcher   devicegpuzcuda:
max_epochsz1max_epochs is missing from the configuration filezhparams.yaml)output_folderseedlrweight_decayclip_grad_normfactorpatiencedont_halve_until_epochr   )load_hyperpyyaml)	overrides)experiment_directoryhyperparams_to_saver,   Fcpunccl)debugr    data_parallel_backenddistributed_launchdistributed_backendfind_unused_parameterscuda:epoch_countercheckpointercounter	optimizer)modules	opt_classhparamsrun_optsr9   )6
isinstancestrget_or_download_model_dir	model_dirospathjoinr   CONFIGURATIONdirnamer   __init__build_modelr   r   getr   r   _distr   r   r    hasattrcfgtrainr"   _max_epochsr   r   r$   r;   r%   r&   r'   lr_schedulerr(   r)   r*   hyperpyyamlr+   openr>   sbcreate_experiment_directorytypeindexutils
epoch_loopEpochCounterr8   add_recoverablesas_dict
Separation	separator)selfr   r   r   r   r   r   kwargs_
world_sizedevice_name
local_rankhparams_filer,   r+   finr?   r<    rg   `/home/ubuntu/.local/lib/python3.10/site-packages/modelscope/trainers/audio/separation_trainer.pyrI   .   s   
	















zSeparationTrainer.__init__returnc                 C   sD   t j| j| jdd}t|trt|dr|jS t|tj	j
r |S dS )z1 Instantiate a pytorch model and return.
        T)cfg_dicttrainingr   N)r	   from_pretrainedrC   rN   r@   r
   rM   r   torchnnModule)r_   r   rg   rg   rh   rJ      s   
zSeparationTrainer.build_modelc                 O   s,   | j j| j| j| j| jd | jd d d S )Ndataloader_opts)train_loader_kwargsvalid_loader_kwargs)r^   fitr8   r   r   r>   )r_   argsr`   rg   rg   rh   rO      s   
zSeparationTrainer.traincheckpoint_pathc                 O   sB   |r|| j j_n| jj| jd | jj| j| j d t	d}t	|iS )N)r    rp   )test_loader_kwargsmin_key)
r>   r9   checkpoints_dirr   load_check_pointr    r^   evaluater   EVAL_KEY)r_   ru   rt   r`   valuerg   rg   rh   rz      s   zSeparationTrainer.evaluate)__name__
__module____qualname____doc__r   rA   r   r   r   r   rI   rm   rn   ro   rJ   rO   r   floatrz   rg   rg   rg   rh   r   !   s0    
c
	
r   c                   @   sb   e Zd ZdZdddZdd Zdd Zd	d
 Zdd Zdd Z	dd Z
dd Zdd Zdd ZdS )r]   z:A subclass of speechbrain.Brain implements training steps.Nc                    s  |\}}|  j|  j}}tjfddt jjD dd  j|tjj	krmt
 4  jjs: jjrG |\}d} jjrR j||} jjr^ |\}W d   n1 shw   Y   jd |} jd |}t|g jj }|| tj fddt jjD dd}|d	}	|d	}
|	|
krt|d
d
d
|	|
 f}|fS |ddd|	ddf }|fS )z?Forward computations from the mixture to the separated signals.c                    s   g | ]} | d   dqS )r   )	unsqueeze.0i)targetsrg   rh   
<listcomp>   s    z.Separation.compute_forward.<locals>.<listcomp>r   dimNencodermasknetc                    s$   g | ]} j d  | dqS )decoderr   )r<   r   r   )r_   sep_hrg   rh   r      s    r   r   )tor    rm   catranger>   num_spksrT   StageTRAINno_graduse_speedperturbuse_rand_shiftadd_speed_perturbsumuse_wavedropwavedroplimit_training_signal_lencut_signalsr<   stacksizeFpad)r_   mixr   stagenoisemix_lensmix_west_mask
est_sourceT_originT_estrg   )r_   r   r   rh   compute_forward   sL   

	




zSeparation.compute_forwardc                 C   s   | j ||S )zComputes the sinr loss)r>   loss)r_   predictionsr   rg   rg   rh   compute_objectives   s   zSeparation.compute_objectivesc                 C   sf  |j }|j|jg}| jjdkr||j | jrt ; | 	||t
jj\}}| ||}| jjrK| jj}|||k }| dkrF| }n	td n| }W d   n1 sYw   Y  || jjk r| dkr| j|  | jjdkr| j| j tjj| j | jj | j | j | j!  n|  j"d7  _"t#$d%| j" t&d'| j(|_)nt| 	||t
jj\}}| ||}| jjr| jj}|||k }| dkr| }n| }|| jjk r| dkr|  | jjdkrtjj| j | jj | j   n|  j"d7  _"t#$d%| j" t&d'| j(|_)| j*  |+ , S )zTrains one batch   r   zloss has zero elements!!Nr   zNinfinite loss or empty loss! it happened {} times so far - skipping this batch)-mix_sigs1_sigs2_sigr>   r   appends3_sigauto_mix_precr   r   rT   r   r   r   threshold_byloss	thresholdnelementmeanprintloss_upper_limscalerscalebackwardr'   unscale_r;   rm   rn   rX   clip_grad_norm_r<   
parametersstepupdatenonfinite_countloggerinfoformattensorr   r    data	zero_graddetachr/   )r_   batchmixturer   r   r   thloss_to_keeprg   rg   rh   	fit_batch   sz   




zSeparation.fit_batchc                 C   s   |j }|j}|j|jg}| jjdkr||j t	  | 
|||\}}| ||}W d   n1 s6w   Y  |tjjkrn| jjrnt| jdrd| jjdkrc| |d ||| | j jd7  _n
| |d ||| |  S )z/Computations needed for validation/test batchesr   Nn_audio_to_saver   r   )idr   r   r   r>   r   r   r   rm   r   r   r   rT   r   TEST
save_audiorM   r   r   r   )r_   r   r   snt_idr   r   r   r   rg   rg   rh   evaluate_batch5  s&   
zSeparation.evaluate_batchc                 C   s   d|i}|t jjkr|| _|t jjkrVt| jjtj	r/| j| j
g||\}}t| j
| n
| jj
jjd d }| jjj||d| j|d | jjd|d idgd dS dS )z"Gets called at the end of a epoch.r   r   r%   )epochr%   )
stats_metatrain_statsvalid_stats)metamin_keysN)rT   r   r   r   VALIDr@   r>   rQ   
schedulersReduceLROnPlateaur;   update_learning_rateoptimparam_groupstrain_logger	log_statsr9   save_and_keep_only)r_   r   
stage_lossr   stage_stats
current_lrnext_lrrg   rg   rh   on_stage_endM  s0   


zSeparation.on_stage_endc           
      C   sh  d}d}| j jrg }d}t|jd D ]-}| j |dddd|f |}|| |dkr4|jd }q|jd |k r@|jd }q| j jrud}t|jd D ]&}t| j j	| j j
d}|| | j||< tj|| |d fdd||< qN|r| j jrtj|jd ||jd |jtjd	}t|D ]\}}|| ddd|f |dddd|f< q|d}	|	|fS )
z=Adds speed perturbation and random_shift to the input signalsr   FTNr   r   r   )shiftsdims)r    dtype)r>   r   r   shapespeedperturbr   r   rm   randint	min_shift	max_shiftr   r    rollzerosr   	enumerater   )
r_   r   	targ_lensmin_len	recombinenew_targetsr   
new_target
rand_shiftr   rg   rg   rh   r   m  sL   


,
zSeparation.add_speed_perturbc                 C   sp   t ddtd|jd | jj  d }|dd||| jj ddf }|dd||| jj f }||fS )zThis function selects a random segment of a given length within the mixture.
        The corresponding targets are selected accordinglyr   r   r   N)rm   r   maxr   r>   training_signal_lenitem)r_   r   r   	randstartrg   rg   rh   r     s    
zSeparation.cut_signalsc                 C   s6   t |dr	|  | D ]}||kr| | qdS )z3Reinitializes the parameters of the neural networksreset_parametersN)rM   r  r<   reset_layer_recursively)r_   layerchild_layerrg   rg   rh   r    s   

z"Separation.reset_layer_recursivelyc                 C   s*  ddl m} tj| jjd}g }g }g }g }g d}tjj	j
|fi | jj}	t|d(}
tj|
|d}|  t|	dd}t|D ]\}}|j\}}|j}|j|jg}| jjd	krd||j t  | |j|tjj\}}W d
   n1 sw   Y  | ||}tj|g| jj dd}| |j!}| ||}|" |"  }||d # $ % |d # & $ % \}}}}||d # $ % |d # & $ % \}}}}|" |"  }|d |" ||'  |'  d}|(| ||"  ||"  ||'   ||'   qFdt)*|" t)*|" t)*|" t)*|" d}|(| W d
   n	1 sJw   Y  W d
   n	1 sZw   Y  t+,d-t)*|"  t+,d-t)*|"  t+,d-t)*|"  t+,d-t)*|"  d
S )zVThis script computes the SDR and SI-SNR metrics and saves
        them into a csv filer   )bss_eval_sourcesztest_results.csv)r   sdrsdr_ir   zsi-snr_iw)
fieldnamesT)dynamic_ncolsr   Nr   r   avgzMean SISNR is {}zMean SISNRi is {}zMean SDR is {}zMean SDRi is {}).mir_eval.separationr  rD   rE   rF   r>   r#   rT   dataio
dataloadermake_dataloaderrp   rS   csv
DictWriterwriteheaderr   r   r   r   r   r   r   r   r   rm   r   r   r   r   r   r   r   r    r   tr/   numpyr   r   writerownparrayr   r   r   )r_   	test_datar  	save_fileall_sdrs
all_sdrs_i
all_sisnrsall_sisnrs_icsv_columnstest_loaderresults_csvwriterr  r   r   r   mix_lenr   r   r   sisnrmixture_signalsisnr_baselinesisnr_ir  ra   sdr_baseliner  rowrg   rg   rh   save_results  s   



DzSeparation.save_resultsc           	   	   C   sN  t j| jjd}t j|st | t| jjD ]^}|ddd|f }||	 
  d }t j|d||d }t||d | jj |ddd|f }||	 
  d }t j|d||d }t||d | jj q|d dddf }||	 
  d }t j|d|}t||d | jj dS )	zFsaves the test audio (mixture, targets, and estimated sources) on diskaudio_resultsr   Ng      ?zitem{}_source{}hat.wavr   zitem{}_source{}.wavzitem{}_mix.wav)rD   rE   rF   r>   save_folderexistsmkdirr   r   absr   r   
torchaudiosaver   r/   sample_rate)	r_   r   r   r   r   	save_pathnssignalr  rg   rg   rh   r     s8   
zSeparation.save_audio)N)r}   r~   r   r   r   r   r   r   r   r   r   r  r)  r   rg   rg   rg   rh   r]      s    
6F /^r]   )5r  rD   typingr   r   r   r  r  speechbrainrT   speechbrain.nnet.schedulersnnetr   rm   torch.nn.functionalrn   
functionalr   r/  torch.cuda.ampr   torch.utils.datar   r   modelscope.metainfor   modelscope.modelsr	   r
   modelscope.msdatasetsr   modelscope.trainers.baser   modelscope.trainers.builderr   modelscope.utils.constantr   r   modelscope.utils.devicer   modelscope.utils.loggerr   modelscope.utils.torch_utilsr   r   r   r{   r   register_modulespeech_separationr   Brainr]   rg   rg   rg   rh   <module>   s6    