o
    ॵi                     @   sF   d dl Z d dlZ			dddZdd Zddd	Z			dd
dZdS )    NTc                 C   sD   t | |||||d\}}|r|  r|dur| j|||d ||fS )a'  Load training checkpoint

    Arguments:
        load_dir: Required. Directory to load the checkpoint from
        tag: Required. Checkpoint tag used as a unique identifier for the checkpoint. Ex. Global Step.
        load_module_strict: Optional. Boolean to strictly enforce that the keys in state_dict of module and
         checkpoint match.
        load_optimizer_states: Optional. Boolean to load the training optimizer states from Checkpoint.
         Ex. ADAM's momentum and variance
        load_lr_scheduler_states: Optional. Boolean to add the learning rate scheduler states from Checkpoint.
    Return:
        load_path: Path of the loaded checkpoint. None if loading the checkpoint failed
        client_state: State dictionary used for loading required training states in the client code.
    )load_module_strictload_optimizer_statesload_lr_scheduler_statesNr   )_load_checkpointzero_optimization_load_zero_checkpoint)modelload_dirtagr   r   r   	load_pathclient_states r   X/home/ubuntu/.local/lib/python3.10/site-packages/modelscope/utils/nlp/load_checkpoint.pyload_checkpoint   s   
r   c                 C   s"   t j|t|d| d }|S )Nzmp_rank_{:02d}z_model_states.pt)ospathjoinstrformat)mp_rankcheckpoints_pathr   	ckpt_namer   r   r   _get_ckpt_name:   s
   r    c                 C   s2   t | ||}tj|dd d}d|v r|d S |S )Nc                 S      | S Nr   storagelocr   r   r   <lambda>D       zpre_load.<locals>.<lambda>map_locationmodule)r   torchload)r   r
   r   r   
checkpointr   r   r   pre_loadA   s
   r(   c           	         s  |  ||}tj|sdS tj|dd d}| j|d |d |  s?|r?|  r5| j	j
|d |d n
|r?| j	
|d  |rN| jd urN| j
|d	  |d
 | _|d | _|d| j|   | _|d | _|d | _|d | _g d  fdd| D }||fS )N)NNc                 S   r   r   r   r   r   r   r   r    U   r!   z"_load_checkpoint.<locals>.<lambda>r"   r$   )
state_dictstrict	optimizerr   lr_schedulercsr_tensor_module_namesglobal_stepsglobal_samplesskipped_stepsmp_world_sizedp_world_size)r$   r+   r,   r-   r0   r.   r2   r1   c                    s   i | ]\}}| vr||qS r   r   ).0keyvaluedeepspeed_statesr   r   
<dictcomp>o   s    z$_load_checkpoint.<locals>.<dictcomp>)r   r   r   existsr%   r&   load_module_state_dictr   fp16_enabledr+   load_state_dictr,   r-   r.   gettrain_batch_sizer/   r0   loaded_checkpoint_mp_world_sizeloaded_checkpoint_dp_world_sizeitems)	r	   r
   r   r   r   r   r   r'   client_stater   r6   r   r   H   s@   





r   )TTT)r   )r   r%   r   r   r(   r   r   r   r   r   <module>   s   
%

