o
    i"                     @   s  d dl Z d dlZd dlZd dlmZ d dlZd dlZd dlZd dlZd dl	m
Z
 d dlmZ d dlmZ d dlmZmZ d dlmZmZ d dlmZ d dlmZ d dlmZ d d	lmZ d d
l m!Z! d dl"m#Z# d dl$m%Z% d dl&m'Z' d dl(m)Z) d dl*m+Z+ d dl,m-Z- d dl.m/Z/ d dl0m1Z1 d dl2m3Z3 d dl4m5Z5 d dl6m7Z7 d dl8m9Z9 zd dl:Z:W n   dZ:Y ej;ddddefddZ<dd Z;e=dkre<  dS dS )    N)BytesIO)nullcontext)
DictConfig	OmegaConf)autocast
GradScaler)DistributedDataParallel)FullyShardedDataParallel)Join)ShardedGradScaler)average_checkpoints)tables)optim_classes)Trainer)scheduler_classes)
initialize)download_model)mark_only_lora_as_trainable)set_all_random_seed)load_pretrained_model)prepare_model_dir)model_summary)	AutoModel)config_nameversion_basekwargsc                 C   st   |  ddrdd l}|  d| v sJ d| vr1td|  dd tdd	|  d	d
i| } tdi |  d S )NdebugFr   model
model_confz"download models from model hub: {}hubmsis_trainingT )getpdb	set_tracelogginginfoformatr   main)r   r$   r"   r"   G/home/ubuntu/.local/lib/python3.10/site-packages/funasr/bin/train_ds.py
main_hydra,   s   r+   c            !      K   s  t | dd | dtjjjtjj_| dtjjjtjj_| ddtjj_| ddtjjj	_
ttjdd}ttjd	d}ttjd
d}|dkrVt  |dk}| dd}| dd}|r{td|  tj| ddd n |s|rtd| d|  tj| dddd tj| td | dd}d| d< tdHi | }|dkrtdHi |  |j} || d< | d }	| d }
|j}| d= | dd }|d urd|v rt|}t|ttfs|f}td | |D ]$}| D ]\}}| |d! s||krtd"| d# d|_!qq|dkr(tt"|  t#dH|||||| d | d$d | d%d&d'| d(}|j$|fi | }ttjd	d| d< ttjd	d|_%|j&|fi | \}}}td) tj'| d* d+d,}|dHi | }|j(rt)dd-nd }|j*rt+|j(d-n|}|j,||||d. | d(i d/d}t-d0}d}d1\}}t.|j/|j0D ]#}t12 }t.|j3|j4D ]}t12 }|j5|||j6d2\}}|j7|||||||||j4|j6d3
 d|_6t8|9 j%}|j:dkrtj%| tj;  W d    n	1 sw   Y  t12 | d4 }td5| d6|d7d8|j4 d9|j4|  d:|j4| | d7d;|j0|  d<|j0| d |j4 |j4 | | d7d= qd|_3|j<|||d d> |j=}||k rtd?| d@|  |}d}n|d7 }tdA| dB| dC |dkr||krtdD|d    n;d|_>|j?|d ||||d. t12 } | | d4 }td5| d6|d7d8|j0 dE|j0| | d7d=	 dF|_@dF|_Aq|jBdkrtC|jD|jE|jFdG |G  d S )INseedr   cudnn_enabledcudnn_benchmarkcudnn_deterministicTenable_tf32RANK
LOCAL_RANK
WORLD_SIZE   use_fsdpFuse_deepspeedzuse_deepspeed: backendnccl)dist_backendz	use_ddp: z, use_fsdp: zenv://)r7   init_methodz Build model, frontend, tokenizerdevicecudacpu	tokenizerfrontendr   freeze_param,zfreeze_param is not None: %s.zSetting z.requires_grad = Falseexcludes
output_dirz./exp)rank
local_rank
world_sizeuse_ddpr5   r;   rC   rD   
train_confzBuild dataloaderdataset_conf
dataloaderDataloaderMapStyle)enabled)r   optim	schedulerscalerearly_stopping_patienceinf)NN)data_split_i
start_step)
r   rN   rO   rP   dataloader_traindataloader_valepochrS   data_split_numrT   g      @z

rank: z, time_escaped_epoch: z.3fz hours, estimated to finish z data_slices, remaining: z	 slices, z hours, epoch: z	 epochs, z hours
)r   rV   rW   zcurrent_val: z, best_val_loss: zNo val_loss improvement for /z epochsz"Early stopping triggered at epoch z epoch: g        )r6   r"   )Hr   r#   torchbackendscudnnrM   	benchmarkdeterministicr<   matmul
allow_tf32intosenvironr   printr&   r'   	deepspeedinit_distributeddistinit_process_group
set_devicer   r   r   r   eval
isinstancelisttuplenamed_parameters
startswithrequires_gradr   r   
warp_modelr;   warp_optim_schedulerdataloader_classesuse_fp16r   r5   r   resume_checkpointfloatrangestart_epoch	max_epochtimeperf_counterstart_data_split_irX   
build_iterrT   train_epochnext
parameterstypeempty_cachevalidate_epochval_loss_avgstep_in_epochsave_checkpointtrain_acc_avgtrain_loss_avgrE   r   rD   avg_nbest_modelr6   close)!r   rE   rF   rG   rH   r5   r6   r;   r   r>   r?   r@   tkptrainerrN   rO   dataloader_classrK   rP   rQ   best_val_lossepochs_no_improvedataloader_trrV   rW   time1rS   time_slice_itime_escapedcurrent_valtime2r"   r"   r*   r)   ;   s>  





	

"

r)   __main__)>rb   sysrZ   torch.nnnnhydrar&   rz   argparseior   
contextlibr   torch.distributeddistributedrg   	omegaconfr   r   torch.cuda.ampr   r   torch.nn.parallelr   DDPtorch.distributed.fsdpr	   FSDP!torch.distributed.algorithms.joinr
   *torch.distributed.fsdp.sharded_grad_scalerr   'funasr.train_utils.average_nbest_modelsr   funasr.registerr   funasr.optimizersr   funasr.train_utils.trainer_dsr   funasr.schedulersr   funasr.train_utils.initializer   'funasr.download.download_model_from_hubr   funasr.models.lora.utilsr   &funasr.train_utils.set_all_random_seedr   (funasr.train_utils.load_pretrained_modelr   funasr.utils.miscr    funasr.train_utils.model_summaryr   funasrr   re   r)   r+   __name__r"   r"   r"   r*   <module>   sR    9
