o
    ॵiB                     @   sf  d dl Z d dlZd dlZd dlmZ d dlZd dlZd dlmZ d dlm	Z	m
Z
 d dlmZ d dlmZ d dlmZ d dlmZ d d	lmZmZ d d
lmZ d dlmZ d dlmZmZ d dlmZmZ d dl m!Z! d dl"m#Z# d dl$m%Z% d dl&m'Z' d dl(m)Z) d dl*m+Z+m,Z,m-Z- G dd deZ.dd Z/G dd deeeZ0ej1ej2dG dd deZ2dS )    N)partialmethod)DeepSpeedEngine)mpuprint_rank_0)HfTrainerDeepSpeedConfig)Hooks)LoadCheckpointHook)HOOKS)BestCkptSaverHookCheckpointHook)CheckpointProcessor)Hook)LrSchedulerHookLrSchedulerProcessor)OptimizerHookOptimizerProcessor)Priority)save_checkpoint)DistributedParallelType)create_device)
get_logger)get_dist_infoget_local_rank	init_distc                   @   s    e Zd ZdZdd Zdd ZdS )DeepSpeedConfigz
    The `DeepSpeedConfig` object is meant to be created during `TrainingArguments` object creation and has the
    same lifespan as the latter.
    c                 C   s   |  |}|d u rdS |dkS )NFauto)	get_value)selfds_key_longval r    h/home/ubuntu/.local/lib/python3.10/site-packages/modelscope/trainers/hooks/distributed/deepspeed_hook.pyis_auto)   s   
zDeepSpeedConfig.is_autoc                    s6  g d} fdd|D }t |dkrQt|jdr|jj}nt|jdr+t|jj}ntd| d d	||    rQ d
d| |   dd|  |j	j
di }|di }|dd}	|dd}
|	dkrq|	nt||
 }	 d|  d|	 t  jdkrd j}td| ddS )z
        This stage runs after we have the model and know num_training_steps.

        Now we can complete the configuration process.
        )$zero_optimization.reduce_bucket_size-zero_optimization.stage3_prefetch_bucket_size4zero_optimization.stage3_param_persistence_thresholdc                    s   g | ]	}  |r|qS r    )r"   ).0xr   r    r!   
<listcomp>>   s
    
z;DeepSpeedConfig.trainer_config_finalize.<locals>.<listcomp>r   hidden_sizehidden_sizeszThe model's config file has neither `hidden_size` nor `hidden_sizes` entry, therefore it's not possible to automatically fill out the following `auto` entries in the DeepSpeed config file: zb. You can fix that by replacing `auto` values for these keys with an integer value of your choice.r#   r$   ?r%   
   optionswarmupwarmup_stepswarmup_ratio        z scheduler.params.total_num_stepsz!scheduler.params.warmup_num_steps
z]Please correct the following DeepSpeed config values that mismatch TrainingArguments values:
zF
The easiest method is to set these DeepSpeed config values to 'auto'.N)lenhasattrconfigr*   maxr+   
ValueError	fill_onlyis_zero3train	optimizergetmathceil
fill_match
mismatchesjoin)r   argsmodelnum_training_stepshidden_size_based_keyshidden_size_auto_keysr*   r.   r/   r0   r1   rA   r    r(   r!   trainer_config_finalize0   sT   	


z'DeepSpeedConfig.trainer_config_finalizeN)__name__
__module____qualname____doc__r"   rH   r    r    r    r!   r   #   s    r   c                 C   sL   |j }d }d|vr| rtd | j}d|d< d }d|vr"| j}||fS )Nr<   zDetected ZeRO Offload and non-DeepSpeed optimizers: This combination should work as long as the custom optimizer has both CPU and GPU implementation (except LAMB)Tzero_allow_untested_optimizer	scheduler)r6   
is_offloadloggerinfor<   rN   )trainerhf_deepspeed_configrE   r6   r<   lr_schedulerr    r    r!   deepspeed_optim_schedl   s   rU   c                   @   sh   e Zd ZdZdd ZdddZ		ddd	Zd
d Zdd Zdd Z	dd Z
dd Zdd Zdd ZdS )DeepspeedProcessorrD   c              	   C   sD   zt  }|dkrW dS t  }d|W S  ttfy!   Y dS w )N    z_mp_rank_{:02d})r   $get_tensor_model_parallel_world_sizeget_tensor_model_parallel_rankformatImportErrorAssertionError)r   tp_world_sizemp_rankr    r    r!   	rank_name   s   zDeepspeedProcessor.rank_nameTc                 C   s&   |sdS t  }d|}d| dS )Nzpytorch_model.binz{:02d}mp_rank_z_model_states.pt)r   rZ   r[   )r   with_mpur_   rankr    r    r!   get_bin_filename   s
   
z#DeepspeedProcessor.get_bin_filenameNc                 C   sN  | |j}||   tj }t||d d |dd tj|}tj	|}	t
  }
| |
}tj||}| jdks=|
rE|j||	 n
t||d d d dd | jdkrVd S |
rbtj|| j|}ntj||}tj|rtt| z	t|| W d S  ty } zt d| d| d| d t|| W Y d }~d S d }~ww )	NF)meta
with_model   )re   	with_metazLink z to z error: z@, changing to copy the bin file, this may case more space usage.)unwrap_modulerD   r`   r   TRAINER_STATE_SUFFIXr   ospathdirnamebasenamer   is_unitializedrd   rB   
zero_stage_BIN_FILE_DIRisfileunlinklinkOSErrorr   errorshutilcopyfile)r   rR   checkpoint_path_prefix
output_dirre   save_optimizersrD   _train_state_filesave_dirprefixrb   bin_filesrc_file	dest_fileer    r    r!   save_checkpoints   sB   




z#DeepspeedProcessor.save_checkpointsc                 C   s:   ||    tj }tj|rt| tj|dd d S )NT)ignore_errors)	r`   r   rj   rk   rl   rr   removerw   rmtree)r   rR   ry   r|   r    r    r!   remove_checkpoints   s   

z%DeepspeedProcessor.remove_checkpointsc                 C   s  t j|sJ t j|}t j|}i }||   tj }t j|r,| 	|||}t
|jtr?|jj|||| d |S |}	|  }
t j|	|
}tj|dd d}|d }||j }|D ]}|| vrrtd|  qctd|  qc||jj||d |S )	N)load_module_strictload_module_onlyc                 S   s   | S Nr    )storagelocr    r    r!   <lambda>   s    z5DeepspeedProcessor.load_checkpoints.<locals>.<lambda>)map_locationmodulez
Skip key: zLoading key: )strict)rk   rl   isdirrm   rn   r`   r   rj   rr   load_trainer_state
isinstancerD   r   load_checkpointrd   rB   torchloadri   
state_dictkeysr   load_state_dict)r   ry   rR   load_all_stater   rl   tagre   r|   r}   r   
model_file
checkpoint
model_dictkeyr    r    r!   load_checkpoints   sF   
z#DeepspeedProcessor.load_checkpointsc                 C   s.   |D ]}|j | }|j| q|j  d S r   )train_outputsrD   backwardstep)r   rR   	loss_keyscumulative_iters	grad_clipklossr    r    r!   r      s   
zDeepspeedProcessor.backwardc                 C      d S r   r    r   rR   r    r    r!   initialize_optimizer     z'DeepspeedProcessor.initialize_optimizerc                 C   r   r   r    r   r    r    r!   r     r   zDeepspeedProcessor.stepc                 C   s   dS NTr    r   r    r    r!   should_save_on_rank  r   z&DeepspeedProcessor.should_save_on_rankc                 C   sz   t |jtjjst |jtjrdd |jjD }|S t |jtr9t }|j	 D ]\}}dd |jD ||< q(|S t
d)Nc                 S      g | ]}|d  qS lrr    r&   groupr    r    r!   r)         z5DeepspeedProcessor.get_current_lr.<locals>.<listcomp>c                 S   r   r   r    r   r    r    r!   r)     r   z6lr is not applicable because optimizer does not exist.)r   r<   r   optim	Optimizer	deepspeedDeepSpeedOptimizerparam_groupsdictitemsRuntimeError)r   rR   r   namer   r    r    r!   get_current_lr
  s   z!DeepspeedProcessor.get_current_lr)Tr   )rI   rJ   rK   rq   r`   rd   r   r   r   r   r   r   r   r   r    r    r    r!   rV      s    

*'rV   )module_namec                   @   sV   e Zd ZejZ					dddZdd Zdd	 Zd
d Z	dd Z
dd Zdd ZdS )DeepspeedHookNTFc                 C   s:   || _ || _|| _|| _|d ur|dv sJ d|| _d S )N)r   rW      rg   z zero_stage must in (0, 1, 2, 3)!)save_zero_checkpoint"deepspeed_activation_checkpointingrb   deepspeed_configrp   )r   r6   r   r   rb   rp   r    r    r!   __init__  s   

zDeepspeedHook.__init__c                 C   s  t  }|t}t|dkrt|d jt s|d | |t}t|dkr7t|d jt s7|d | |t}t|dkrQt|d jt sQ|d | |t	}t|dkrkt|d jt sk|d | |t
}t|dkrt|d jt s|d | || _d S )Nr   )rV   get_hookr   r4   r   	processorset_processorr   r
   r   r   )r   rR   r   optimizer_hook	ckpt_hookbest_ckpt_hookload_ckpt_hooklr_scheduler_hookr    r    r!   register_processor+  s6   









z DeepspeedHook.register_processorc                    s   j jdd _ j dd _ j jdd _ j jdd _ j jd	d
 _ j jdd _	 j jdd _
 j dd _ j dd _ j dd _ j dd _ j dd  _t fdd j jD  j _ js|d _ j dd _d S )Nbatch_size_per_gpu   	clip_gradg      ?r   gh㈵>
adam_beta1r,   
adam_beta2g+?adam_epsilong:0yE>weight_decayr2   use_fp16Ffp16_backendampsave_on_each_nodefp16_opt_levelc                 3   s*    | ]}|d  dkr| d jV  qdS )typeApexAMPOptimizerHook	opt_levelN)r=   r   )r&   itemrC   r    r!   	<genexpr>T  s    z-DeepspeedHook.prepare_args.<locals>.<genexpr>O1bf16)r;   
dataloaderr=   per_device_train_batch_sizemax_grad_normr<   learning_rater   r   r   r   fp16fp16_full_evalr   r   r   nexthooksr   )r   rC   r    r   r!   prepare_argsF  s,   zDeepspeedHook.prepare_argsc                 C   s   t  \}|_| | tj| jr| j}n	tj|j| j}tj|s.t	d| j d| j
d|  t|}|| |||j| |S )Nz$No such DeepSpeed json config file: .zLoading deepspeed config from )r   
world_sizer   rk   rl   existsr   rB   	model_dirr   rP   rQ   r   trainer_config_processrH   rD   )r   rR   rC   	max_steps_r   	ds_configr    r    r!   get_deepspeed_config\  s    


z"DeepspeedHook.get_deepspeed_configc                 C   s<   t d t }td| |_|j|j d |jtj< d S )Npytorchzcuda:)	r   r   r   devicerD   toparallel_groupsr   DP)r   rR   
local_rankr    r    r!   
after_initp  s
   zDeepspeedHook.after_initc                 C   r   r   r    r   r    r    r!   
before_valw  r   zDeepspeedHook.before_valc           
      C   s   t |ds
t | _n|j| _|j}|jjdi dd|_|j|j }t	
|j| }| |||}t|||\}}|j}| jd urJ| j|d d< |d dd| j_tj|j|||d\|_|_}	|_d S )	NrP   r.   r   rW   zero_optimizationstager   )rD   r<   r6   rT   )r5   r   rP   cfgr;   r<   r=   gradient_accumulation_stepsiters_per_epochr>   r?   _max_epochsr   rU   r6   rp   r   r   
initializerD   rT   )
r   rR   rC   num_update_steps_per_epochr   r   r<   rT   r6   r   r    r    r!   
before_runz  s2   


zDeepspeedHook.before_run)NTFTN)rI   rJ   rK   r   	VERY_HIGHPRIORITYr   r   r   r   r   r   r  r    r    r    r!   r     s    
r   )3r>   rk   rw   	functoolsr   r   r   r   megatron_utilr   r   transformers.deepspeedr   modelscope.metainfor   modelscope.trainers.hooksr   !modelscope.trainers.hooks.builderr	   4modelscope.trainers.hooks.checkpoint.checkpoint_hookr
   r   9modelscope.trainers.hooks.checkpoint.checkpoint_processorr   modelscope.trainers.hooks.hookr   +modelscope.trainers.hooks.lr_scheduler_hookr   r   (modelscope.trainers.hooks.optimizer.baser   r   "modelscope.trainers.hooks.priorityr   modelscope.utils.checkpointr   modelscope.utils.constantr   modelscope.utils.devicer   modelscope.utils.loggerr   modelscope.utils.torch_utilsr   r   r   r   rU   rV   register_moduler   r    r    r    r!   <module>   s>   I 