o
    TiY                     @   s  d dl mZ d dlmZ d dlZd dlZd dlZd dlZd dlmZ d dl	Z	d dl
Z
d dlZd dlZd dlZd dlmZ d dlmZmZmZmZmZmZmZmZmZmZmZmZmZmZmZmZm Z m!Z!m"Z"m#Z#m$Z$m%Z%m&Z& dd Z'd	d
 Z(dd Z)dd Z*dd Z+dd Z,dd Z-d a.dd Z/dd Z0d@ddZ1dd Z2dd Z3dd  Z4d!d" Z5d#d$ Z6d%d& Z7d'd( Z8d)d* Z9d+d, Z:d-d. Z;d/d0 Z<d1d2 Z=d3d4 Z>d5d6 Z?d7d8 Z@d9d: ZAd;d< ZBd=d> ZCeDd?kre' ZEeCeE dS dS )A    )partial)chainN)ProcessPoolExecutor)DeepSpeedCheckpoint)OPTIMIZER_STATE_DICT
ZERO_STAGEBASE_OPTIMIZER_STATESINGLE_PARTITION_OF_FP32_GROUPSPARAM_GROUPSPARAM_SLICE_MAPPINGSPARAM_SHAPESPARAMCAT_DIMPARAM_N_SUB_PARAMSSUB_PARAM_SHAPEVOCAB_TENSORUNIVERSAL_CHECKPOINT_INFO UNIVERSAL_CHECKPOINT_VERSION_KEY"UNIVERSAL_CHECKPOINT_VERSION_VALUEVOCABULARY_PARAMETER_PATTERNS&PIPELINE_REPLICATED_PARAMETER_PATTERNS TP_REPLICATED_PARAMETER_PATTERNSPARAMETER_TO_AVERAGE_PATTERNS'PARAMETER_WITH_ROW_PARALLELISM_PATTERNS%PARAMETER_WITH_2_SUB_PARAMS_CAT_DIM_0PARAMETER_WITH_SUB_PARAMSSubparamShapec                  C   s   t  } | jdtddd | jdtddd | jddtd	d
 | jddtdd
 | jdddd | jddddd | jdddd |  }td|  |S )Nz--input_folderTz!Input DeepSpeed Checkpoint folder)typerequiredhelpz--output_folderz"Output DeepSpeed checkpoint folderz--num_extract_workers   z2How many parallel processes to extract zero shards)defaultr   r   z--num_merge_workers   zrHow many parallel processes to merge tp slices (more memory intensive, use much fewer than --num_extract_workers))z--keep_temp_folder
store_truezWPreserve temporary folder of intermediate checkpoint slice files. Useful for debugging.)actionr   z--no_strictstrictstore_falsez7Do not perform validity checks on converted checkpoint.)destr$   r   z--inject_missing_statezDInject missing checkpoint state into the checkpoint if it is absent.zargs = )argparseArgumentParseradd_argumentstrint
parse_argsprint)parserargs r1   X/home/ubuntu/.local/lib/python3.10/site-packages/deepspeed/checkpoint/ds_to_universal.pyparse_arguments2   s<   r3   c                 C   s   |   rt| S | S N)isdigitr,   textr1   r1   r2   atoiP   s   r8   c                 C   s   dd t d| D S )z
    alist.sort(key=natural_keys) sorts in human order
    http://nedbatchelder.com/blog/200712/human_sorting.html
    (See Toothy's implementation in the comments)
    c                 S      g | ]}t |qS r1   )r8   ).0cr1   r1   r2   
<listcomp>Z       z natural_keys.<locals>.<listcomp>z(\d+))resplitr6   r1   r1   r2   natural_keysT   s   r@   c           
   	   C   s   g }d|d}t d|D ]7}|g  t d|D ]*}|dkr%d|dn	d|dd|d}tj|d	}	|| tj| ||	 qq|S )
Niter_07dr      mp_rank_02d_03dzmodel_optim_rng.pt)rangeappendospathjoin)
base_folder	iteration	tp_degree	pp_degree	path_listiter_folderijrank_folder	ckpt_pathr1   r1   r2   _create_checkpoint_paths]   s   
(rW   c                 C   s.   t j| \}}t j|dd t||  d S )NTexist_ok)rJ   rK   r?   makedirstorchsave)	file_pathchkpt_sddirrF   r1   r1   r2   _save_checkpointj   s   r`   c                    s  |\}}}|j |||d}|t }|t }|t}	|	tg }
|t d }|t }t	|}t
|D ]R}t|| d || d || d}d|| v rS|| d |d< ||  D ]*\ }|dkrmt fdd	|
D rmqY| D ]}t| |||||  |j|j qqqYq2d S )
Npp_indextp_indexdp_indexstateexp_avg
exp_avg_sqrf   rg   fp32stepr   c                 3   s    | ]	}t | V  qd S r4   r>   match)r:   patternnamer1   r2   	<genexpr>       z&extract_zero_shards.<locals>.<genexpr>)get_zero_checkpoint_stater   r   get_checkpoint_infor   getr   r   r	   lenrH   dictitemsanykeysdump_param_fragmentstartnumel)r_   ds_checkpoint
indices_3Drb   rc   rd   sdoptim_sdparam_slice_mappingsuniversal_checkpoint_infopipeline_replicated_paramsstate_groupsfp32_groupsparam_groups_cntparam_group_id
flat_statefragment_mapping	state_keyr1   rn   r2   extract_zero_shardsp   s6   



r   c                 C   s   t j| | ddd}t|t d d d d |t d d d d |t d	 d d
}d}| D ]2\}}	|	 }
t|
|\}}t|t|
||  }|	 D ]}t
|d|||| ||| qO||7 }q1d S )NcpuFmap_locationweights_onlyoptimizer_state_dictre   r   rf   rg   fp32_flat_groupsrh   )r[   loadrv   r   rw   r|   _zero_partitioned_param_infominabsry   rz   )optim_filesparam_shapes	dp_degreetemp_dirrd   
state_dictr   offsetro   shapeunpartitioned_numelpartitioned_numelrF   padding_free_numelr   r1   r1   r2   extract_zero_shards_stage3   s"   
r   c                 C   s   | dS )Nz0>2dr1   )rd   r1   r1   r2   dp_index_to_str   s   r   c           
      C   sx   t j| |t|}t j|dd td7 at j|| dt| }	|dkr5t|r5|	d||
 }t|	| d S )NTrX   rC   .rj   r   )rJ   rK   rL   r+   rZ   cntr   r[   	is_tensornarrowcloner`   )
r_   rc   rd   
state_namestate_flat_tensor
param_namer   r|   param_base_pathrK   r1   r1   r2   rz      s   rz   c                    s(  g }t |D ]}tj| t||  t  d}t|dkr"qt  d}t	 }|D ]}	|
|	}
|
rC|t|
d q/td|	  fddtt|D }dd |D |d	krvtfd
dD sqJ dd }n|d u rtjdd}n
tjdd|}|| q|S )Nz.*r   z
\.([0-9]+)rC   zCannot parse dp_rank from c                    s   g | ]}  d t | qS )r   )r   )r:   rd   )prefix_pathr1   r2   r<          z&_merge_zero_shards.<locals>.<listcomp>c                 S   s   g | ]	}t j|d dqS )F)r   )r[   r   )r:   pr1   r1   r2   r<          rj   c                 3   s    | ]	}| d  kV  qdS )r   Nr1   )r:   v)shardsr1   r2   rp      rq   z%_merge_zero_shards.<locals>.<genexpr>z(All shards must have the same step valuedim)rH   rJ   rK   rL   r+   globru   r>   compilesetrl   addr,   group
ValueErrorsortedlistallr[   catreshaperI   )r   re   rO   slice_shapeslicesrc   pathsrm   
dp_indicesr   mslicer1   )r   r   r2   _merge_zero_shards   s.   

r   c                    s,  |\}}t j||}t j||}| t}	|	tg }
|	tg }|	tg }|	t	g }|	t
g }|	tg t|
| | | | tdd D  fdd}fdd}||}t|d|}|rztt j|d|d	  d
D ]}t|||t j|| d}i }||
|rtdkrtfdddd  D sJ d	 }n|||rtt }n|||rd	  fddD }tjdd |D  d}tjdd |D  d}tj||g d} |t< d|t< nw|r]g }|j|j }t|ts|f}dd |jD fddtD fddD d	|D ]}| |tjfddD d 7 q2tj|d}||t< n|||rednd	 tj d} |t< |||r|	d }|d |d d f }d|t < ||t!< t|| q|S )Nc                 s   s     | ]}t di |jV  qd S Nr1   )r   patternsr:   sr1   r1   r2   rp      s    z"merge_tp_slices.<locals>.<genexpr>c                    sP    fdd| D }t |dksJ d| d  |r&|d }| |S d S )Nc                    s   g | ]
}t | r|qS r1   rk   )r:   pattern_name_r1   r2   r<          z@merge_tp_slices.<locals>.get_matched_pattern.<locals>.<listcomp>rC   z$Got more than one matching patterns=z for r   )ru   discard)	patterns_r   matched_r   )unmatched_patternsr   r2   get_matched_pattern   s    
z,merge_tp_slices.<locals>.get_matched_patternc                    sJ    D ] }t di |}|jD ]}t|| r!| |    S qqd S r   )r   r   r>   rl   r   )r   subparam_shape_dictsubparam_shaper   )parameter_with_sub_paramsr   r1   r2   get_matched_sub_params_pattern  s   

z7merge_tp_slices.<locals>.get_matched_sub_params_patternrj   zstep.ptr   ri   rf   rg   .ptrC   c                    s   g | ]	} d   |qS r   )equal)r:   other_slice)r   r1   r2   r<     r   z#merge_tp_slices.<locals>.<listcomp>c                    s   g | ]
}t j|d  dqS )r"   r   )r[   chunkr   )cat_dimr1   r2   r<   #  r   c                 S      g | ]}|d  qS r   r1   r   r1   r1   r2   r<   $  r=   r   c                 S   r   )rC   r1   r   r1   r1   r2   r<   %  r=   r"   c                 S   s"   g | ]}t |trt|n|qS r1   )
isinstancetuplesum)r:   dr1   r1   r2   r<   1  s   " c                    s$   g | ]\}}| kr| n|qS r1   r1   )r:   rS   r   )partition_dimrO   r1   r2   r<   2     $ c                    s   g | ]}|  qS r1   )viewr   )partition_shaper1   r2   r<   3  s    c                    s   g | ]	}|  qS r1   )r   r   )r   part_sub_dim_sizer   r1   r2   r<   9  r   original_vocab_sizeT)"rJ   rK   rL   rs   r   rt   r   r   r   r   r   r   r   updater   from_iterabler   r`   ru   r   r   r[   r   r   r   r   r   r   r   	enumeraterI   r   r   r   )r}   r_   	slice_dirrO   name_and_shapero   r   slice_base_pathr   r   replicated_parametersparameters_to_averageparameters_with_row_parallelismvocabulary_parameters&parameters_with_2_sub_params_cat_dim_0r   r   matched_sub_params_shapestep_mergedre   
final_path	ckpt_dictparamchunked_slicesmerged_chunks_0merged_chunks_1merged_chunkssub_dim_sizessub_dim_sizer   r1   )	r   r   r   r   r   r   r   rO   r   r2   merge_tp_slices   s   
		

"





r   c           	      C   sX   t j||}t j||}dD ]}t||d}t j|| d}t||d  qd S )Nr   rC   r   r   )rJ   rK   rL   r   r`   )	r   r_   r   ro   r   r   re   r   r   r1   r1   r2   merge_zero3_slicesR  s   r   c                    s   g }|dkr7t |d" fdd|D }t|D ]	}||  qW d    |S 1 s0w   Y  |S t|D ]	}| | q<|S )NrC   )max_workersc                    s   g | ]}  |qS r1   )submit)r:   workdo_workexecutorr1   r2   r<   `  s    z%_do_parallel_work.<locals>.<listcomp>)r   tqdmrI   result)r  work_chunksnum_workersresultsfuture_listfr  r1   r  r2   _do_parallel_work\  s   
		r  c                 C   sB   t tt|jt|jt|j}tt||}t	||| j
 d S r4   )r   	itertoolsproductrH   rP   rO   r   r   r   r  num_extract_workers)r0   r}   r   _3d_range_listr  r1   r1   r2   _extract_zero_shard_filesk  s   r  c                 C   s*   t t||||}t|tt|| j d S r4   )r   r   r  r   rH   r  )r0   r   r   r   r   r  r1   r1   r2    _extract_zero_shard_files_stage3u  s   r  c           	      C   s   t j| jd}tt||||j}t|t|	 | j
}dd |D }ttj| }| jr9|r7J d| dd S |rEtd| d d S d S )Nzeroc                 S   r9   r1   )r   )r:   lstr1   r1   r2   r<     r=   z)_merge_tp_slice_files.<locals>.<listcomp>zUnused patterns=z while merging tp sliceszWarning: Unused patterns=)rJ   rK   rL   output_folderr   r   rO   r  r   rw   num_merge_workersr   intersectionr%   r.   )	r0   r}   slice_shapesr   zero_output_folderr  unmatched_patterns_listssetsr   r1   r1   r2   _merge_tp_slice_filesz  s   r  c                 C   s4   t j| jd}tt|||}t|| | j d S )Nr  )	rJ   rK   rL   r  r   r   r  ry   r  )r0   r   r   r   r  r  r1   r1   r2   _merge_zero3_slice_files  s   r  c                 C   s.   | | }|r
|| nd}t | | }||fS )Nr   )mathceil)r   
world_size	remainderpadding_numelr   r1   r1   r2   r     s   r   c                 C   s   t j| d t dddt S Nr   r   Fr   )r[   r   devicer   )filesr1   r1   r2   _parse_model_states_stage3  s   r'  c                    st   t ttg |jdddd}|t } fdd| D }|t  t |t< tj	| j
d}tj	|d}t|| d S )Nr   ra   c                    s   i | ]\}}| vr||qS r1   r1   )r:   kr   sharded_statesr1   r2   
<dictcomp>  r   z)_save_optimizer_state.<locals>.<dictcomp>r  optimizer_state.pt)r   r   r	   rr   r   rw   r
   rJ   rK   rL   r  r`   )r0   r}   r   r   	output_sdr  output_file_pathr1   r)  r2   _save_optimizer_state  s   
r/  c                 C   s^   t j|d t ddd}|t }|t t |t< tj| jd}tj|d}t	|| d S )Nr   r   Fr   r  r,  )
r[   r   r%  r   r
   rJ   rK   rL   r  r`   )r0   r   r   r-  r  r.  r1   r1   r2   _save_optimizer_state_stage3  s   r0  c                 C   
   t | dS )Nz*_optim_states.pt_get_checkpoint_filescheckpoint_dirr1   r1   r2   _get_optim_files     
r6  c                 C   r1  )Nz*_model_states.ptr2  r4  r1   r1   r2   _get_model_state_files  r7  r8  c                 C   sB   t ttj| |td}t|dkrtd| d|  d|S )N)keyr   zcan't find z files in directory '')r   r   rJ   rK   rL   r@   ru   FileNotFoundError)r5  glob_pattern
ckpt_filesr1   r1   r2   r3    s   r3  c                 C   s2   t j| d t ddd}|t }|td}|S )Nr   r   Fr   rC   )r[   r   r%  r   rt   r   )r   r   optimizer_state
zero_stager1   r1   r2   _get_zero_stage  s   r@  c                 C   sR   t | jvr%tj| jd tddd}t |vr'i | jt < t| jt  t< d S d S d S r$  )r   global_stater[   r   mp_rank_filesr%  r   r   )r}   r   r1   r1   r2   _inject_missing_state  s   

rC  c                 C   s&   |  t}|d usJ dt dd S )Nz	Required zG state is missing in checkpoint. Verify that client creates this state.)rs   r   )r}   r   r1   r1   r2   _check_for_required_state  s   
rD  c                 C   s`  t d t d| j d| j  t| j}t|}|dkrt| j}| jr*t| nt| |	 }t
| j||j|j}g }|jD ]}tj|tddd}||t 7 }qAtdd	 |D }tj| jd
}	t d t| ||	 t d t| |||	 t d t| | | jstj|	dd ttj| jdD ]	}
t|
| j qn[t| j}t|}dd |D }t |}tj| jd
}	t d t!| ||||	 t d t"| |||	 t d t#| | | jstj|	dd ttj| jdD ]	}
t|
| j qtj$| j\}}tj|d}t%|d}
|
&| W d    n	1 s%w   Y  t d d S )Nz4Convert DeepSpeed Checkpoint to Universal Checkpointz#Converting DeepSpeed checkpoint in z to Universal checkpoint in r"   r   Fr   c                 s   s*    | ]}|  D ]	\}}||fV  qqd S r4   rw   r:   r   r(  r   r1   r1   r2   rp     s   ( zmain.<locals>.<genexpr>tmpz *** 1. Extracting ZeRO fragmentsz*** 2. Merging slices .....z%*** 3. Saving common optimizer statesT)ignore_errorszmp*c                 S   s$   i | ]}|  D ]\}}||qqS r1   rE  rF  r1   r1   r2   r+    r   zmain.<locals>.<dictcomp>z*model_states.ptlatest_universalwz	*** Done!)'r.   input_folderr  r6  r@  r   inject_missing_staterC  rD  get_iterationrW   rO   rP   rB  r[   r   r%  r   rv   rJ   rK   rL   r  r  r/  keep_temp_foldershutilrmtreer   copy2r8  r'  ru   r  r  r0  r?   openwrite)r0   r   r?  r}   rN   checkpoint_pathsr  mp_rank_filemp_sdr   r  model_filesr   r   checkpoint_root_folderstep_folderlatest_filer1   r1   r2   main  sf   






r[  __main__r4   )F	functoolsr   r  r   r(   r   r  concurrent.futuresr   rJ   r>   rO  r[   r  deepspeed.checkpointr   r   r   r   r	   r
   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r3   r8   r@   rW   r`   r   r   r   r   rz   r   r   r   r  r  r  r  r  r   r'  r/  r0  r6  r8  r3  r@  rC  rD  r[  __name__r0   r1   r1   r1   r2   <module>   s^   d	(
"j

			N