o
    Ti'3                     @   s   d dl Z d dlZd dlmZ d dlZddlmZ ddlmZm	Z	m
Z
mZmZ ddlmZmZ ddlmZmZ ddlmZ dd	lT d Zd
ZdZdZdZdZg dZdddZG dd deZdS )    N)Dict   )model_3d_desc)basic_folder_validationmerge_statepartition_data	get_filesget_files_with_prefix)MODEL_FILE_PREFIXLAYER_FILE_PREFIX)reshape_meg_2d_parallelmeg_2d_parallel_map)ZeROCheckpoint)*argscheckpoint_info	iterationzlayer_(\d+)-model_.*)zinput_layernorm.weightzinput_layernorm.biaszself_attention.dense.biaszpost_attention_layernorm.weightzpost_attention_layernorm.biaszmlp.dense_4h_to_h.biaszposition_embeddings.weight)zself_attention.dense.weightzmlp.dense_4h_to_h.weightc                   @   s  e Zd ZdddefddZdd Zdd Zdd	 Zd
d Zdd Z	dd Z
dd Zdd Zdd ZdefddZdefddZdd Zdd Zdd  Zd!edefd"d#Zd!edefd$d%Zd&d' Zd(d) Zefd*d+Zd!ed,edefd-d.Zd!ed,edefd/d0Zd,edefd1d2Zd!edefd3d4Z d!edefd5d6Z!d7efd8d9Z"d!ed,edefd:d;Z#d<d= Z$dLd>d?Z%d@dA Z&dBdC Z'dDdE Z(dFdG Z)dHdI Z*dJdK Z+dS )MDeepSpeedCheckpointNc                 C   s  || _ || _ttt|tdk}| || t|| _t|| _	t| j	t| _
t| j	t| _|  | _t| j| _|d u rC| j n|| _|d u rO| j n|| _|d u r[| j n|| _| j | j  | j  | _| j| j | j | _t| j | j | _| j  t| j | j | j| jd| _|  s|  s|  r| j t!| j| j| j i | _"| #  | $ | _%| & | _'| (t)| _*| (| j | _+| ,  d S )Nr   )old_pp_degreeold_tp_degreenew_pp_degreenew_tp_degree)-final_layer_norm_idxdirlenr	   r   r   _validate_folderr   zero_checkpoint	file_listlayer_filesr
   mp_rank_files_get_layer_keys
layer_keyslayer_countget_src_tp_degree	tp_degreeget_src_pp_degree	pp_degreeget_src_dp_degree	dp_degreeoriginal_world_size
world_sizer   
old_2d_mapsimple_initr   
new_2d_mapis_change_pp_degreeis_change_tp_degreeis_change_dp_degreereshaper   global_state_sanity_check_build_pp_transformer_mappp_to_transformer_map_build_transformer_file_maptransformer_file_map_build_tp_other_layer_mapEMBEDDING_LAYER_INDEXtp_to_embedding_maptp_to_final_norm_map_build_global_state)selfr   r%   r'   r)   r   pipeline_parallel r@   ]/home/ubuntu/.local/lib/python3.10/site-packages/deepspeed/checkpoint/deepspeed_checkpoint.py__init__%   sF   







zDeepSpeedCheckpoint.__init__c                 C      | j | j kS N)r%   r   r$   r>   r@   r@   rA   r0   W      z'DeepSpeedCheckpoint.is_change_tp_degreec                 C   rC   rD   )r'   r   r&   rE   r@   r@   rA   r/   Z   rF   z'DeepSpeedCheckpoint.is_change_pp_degreec                 C   rC   rD   )r)   r   r(   rE   r@   r@   rA   r1   ]   rF   z'DeepSpeedCheckpoint.is_change_dp_degreec              	   C   s\   t d t| jD ]}t| jD ]}| j||d}t d| d| d|  qq	t d d S )Nzreshaped 2d map ---- beginpp_indextp_index[z, z] = zreshaped 2d map ---- end)printranger'   r%   get_2d_parallel_files)r>   ijr   r@   r@   rA   show_2d_mapping`   s   z#DeepSpeedCheckpoint.show_2d_mappingc                 C      |  | jd d S )Ntp_to_embedding_layers)_dump_mappingr;   rE   r@   r@   rA   show_tp_embedding_mapj      z)DeepSpeedCheckpoint.show_tp_embedding_mapc                 C   rQ   )Ntp_to_final_norm_layers)rS   r<   rE   r@   r@   rA   show_tp_final_norm_mapm   rU   z*DeepSpeedCheckpoint.show_tp_final_norm_mapc                 C   rQ   )Npp_to_transformer_layers)rS   r6   rE   r@   r@   rA   show_pp_transformer_mapp   rU   z+DeepSpeedCheckpoint.show_pp_transformer_mapc                 C   rQ   )Nrank_to_transformer_files)rS   r8   rE   r@   r@   rA   show_transformer_file_maps   rU   z-DeepSpeedCheckpoint.show_transformer_file_mapc                 C   sD   t j| jd t ddd}|td| jt< |td | jt< d S Nr   cpuFmap_locationweights_only)torchloadr    devicegetITERATION_KEYr3   ARGS_KEYr>   sdr@   r@   rA   r=   v   s   z'DeepSpeedCheckpoint._build_global_statereturnc                 C   s   | j j|||tgdS )N)rH   rI   dp_indexkeys_to_ignore)r   get_state_for_rankPARAM_SHAPESr>   rH   rI   rj   r@   r@   rA   get_zero_checkpoint_state{   s
   z-DeepSpeedCheckpoint.get_zero_checkpoint_statec                 C   s   | j j|||dS )N)rH   rI   rj   )r   get_files_for_rankrn   r@   r@   rA   get_zero_files   rU   z"DeepSpeedCheckpoint.get_zero_filesc                 C   s
   | j t S rD   )r"   r:   rE   r@   r@   rA   get_embedding_layer_id      
z*DeepSpeedCheckpoint.get_embedding_layer_idc                 C   s   | j | j S rD   )r"   r   rE   r@   r@   rA   get_final_norm_layer_id   s   z+DeepSpeedCheckpoint.get_final_norm_layer_idc                 C   sB   t | jvrtj| jd tddd}|t d| jt < | jt  S r\   )re   r3   ra   rb   r    rc   rd   rg   r@   r@   rA   get_iteration      

z!DeepSpeedCheckpoint.get_iterationrI   c                 C   s4   || j  v s	J dd | j | D }| |}|S )Nc                 S   "   g | ]}t j|t d ddqS r]   Fr^   ra   rb   rc   .0fnamer@   r@   rA   
<listcomp>   s    z;DeepSpeedCheckpoint.get_embedding_state.<locals>.<listcomp>)r;   keys_merge_state_dicts)r>   rI   sd_listrh   r@   r@   rA   get_embedding_state   s   
z'DeepSpeedCheckpoint.get_embedding_statec                 C      || j  v s	J | j | S rD   )r;   r~   r>   rI   r@   r@   rA   get_embedding_files      
z'DeepSpeedCheckpoint.get_embedding_filesc                 C   sB   || j vrtj| jd tddd}||d | j |< | j | S r\   )r3   ra   rb   r    rc   rd   )r>   keyrh   r@   r@   rA   _get_checkpoint_value   rv   z)DeepSpeedCheckpoint._get_checkpoint_valuec                 C   s
   |  tS rD   )r   rf   rE   r@   r@   rA   get_args   rs   zDeepSpeedCheckpoint.get_argsc                 C   s
   |  |S rD   )r   )r>   info_keyr@   r@   rA   get_checkpoint_info   rs   z'DeepSpeedCheckpoint.get_checkpoint_inforH   c                 C   sb   || j k sJ || jk sJ | j||d}dd |D }d }|D ]}|d u r)|}q t||}q |S )N)rI   rH   c                 S   rw   rx   ry   rz   r@   r@   rA   r}         " z=DeepSpeedCheckpoint.get_2d_parallel_state.<locals>.<listcomp>)r%   r'   rM   r   )r>   rI   rH   
fname_listr   	merged_sdrh   r@   r@   rA   get_2d_parallel_state   s   z)DeepSpeedCheckpoint.get_2d_parallel_statec                 C   sZ   || j k sJ || jk sJ g }| j||f D ]}dd |D }| |}|| q|S )Nc                 S   rw   rx   ry   rz   r@   r@   rA   r}      r   z=DeepSpeedCheckpoint.get_transformer_state.<locals>.<listcomp>)r%   r'   r8   r   append)r>   rI   rH   t_listr   r   rh   r@   r@   rA   get_transformer_state   s   
z)DeepSpeedCheckpoint.get_transformer_statec                 C   s   || j k sJ | j| S rD   )r'   r6   )r>   rH   r@   r@   rA   get_pp_transformer_map   s   
z*DeepSpeedCheckpoint.get_pp_transformer_mapc                 C   s6   || j  v s	J tj| j | d tddd}|S r\   )r<   r~   ra   rb   rc   )r>   rI   rh   r@   r@   rA   get_final_norm_state   s    z(DeepSpeedCheckpoint.get_final_norm_statec                 C   r   rD   )r<   r~   r   r@   r@   rA   get_final_norm_files   r   z(DeepSpeedCheckpoint.get_final_norm_fileslayer_indexc                 C   s\   i }t | jdk r|S |t | jksJ t| j| j| }t|| j}dd t|D }|S )Nr   c                 S   s   i | ]\}}||qS r@   r@   )r{   rN   flistr@   r@   rA   
<dictcomp>       zADeepSpeedCheckpoint._build_tp_other_layer_map.<locals>.<dictcomp>)r   r   r	   r"   r   r%   	enumerate)r>   r   data_mapr   layer_file_partitionsr@   r@   rA   r9      s   z-DeepSpeedCheckpoint._build_tp_other_layer_mapc                    s>   | j k sJ | jk sJ  jj||d} fdd|D S )NrG   c                    s   g | ]} j | qS r@   )r    r{   rN   rE   r@   rA   r}      r   z=DeepSpeedCheckpoint.get_2d_parallel_files.<locals>.<listcomp>)r%   r'   r.   get_data)r>   rI   rH   file_indicesr@   rE   rA   rM      s   z)DeepSpeedCheckpoint.get_2d_parallel_filesc                    sL   i }| j dkr$| jd| j t| j    fddtd| j D }|S )Nr   r   c                    s&   i | ]}||  |d     qS )r   r@   r   layers_per_pptransformer_layersr@   rA   r      s    zADeepSpeedCheckpoint._build_pp_transformer_map.<locals>.<dictcomp>)r'   r"   r   r   rL   )r>   r   r@   r   rA   r5      s   

z-DeepSpeedCheckpoint._build_pp_transformer_mapc                 C   s>   |d urt d|  | D ]\}}t | d|  qd S )NzDump mapping: z = )rK   items)r>   r   map_tagkvr@   r@   rA   rS      s
   z!DeepSpeedCheckpoint._dump_mappingc                 C   s   | j d| j }i }d}| jdkrt|| j }t|D ]5\}}|| }t| j|d }t|| j}t	| jD ]}	|	|f}
|
|
 vrGg ||
< ||
 ||	  q7q|S )Nr   r   -)r"   r   r'   r   r   r	   r   r   r%   rL   r~   r   )r>   transformer_layer_keysfile_mapr   	key_index	layer_keyrH   r   r   rI   map_keyr@   r@   rA   r7      s    
z/DeepSpeedCheckpoint._build_transformer_file_mapc                 C   sN   t | j| j dksJ | jj| j| j  dksJ | jj| j dks%J d S )Nr   )r   r    r%   r   	num_filesr'   rE   r@   r@   rA   r4     s   z!DeepSpeedCheckpoint._sanity_checkc                 C   s,   | j D ]}tj|std| d qd S )NzError: z is not existent)r   ospathisfilerK   )r>   filer@   r@   rA   validate_files  s
   
z"DeepSpeedCheckpoint.validate_filesc                 C   s`   t  }| jD ]}tj|\}}tt|d}|	| qt
t|td}dd |D }|S )Nr   r   c                 S   s   g | ]}t t| qS r@   )r   str)r{   layer_idr@   r@   rA   r}     s    z7DeepSpeedCheckpoint._get_layer_keys.<locals>.<listcomp>)setr   r   r   splitresearchLAYER_FILE_PREFIX_PATTERNgroupaddsortedlistint)r>   key_set	file_path_r|   r   
sorted_idsr"   r@   r@   rA   r!     s   
z#DeepSpeedCheckpoint._get_layer_keysc                    s`   i }|d   D ]%  tvr%t d}tj fdd|D |d| < q|d   | < q|S )Nr   c                    s   g | ]}|  qS r@   r@   )r{   rh   r   r@   rA   r}   "  s    z:DeepSpeedCheckpoint._merge_state_dicts.<locals>.<listcomp>)dim)r~   SEQUENTIAL_LAYERSLAYER_CONCAT_DIMrd   ra   cat)r>   r   r   cat_dimr@   r   rA   r     s   "z&DeepSpeedCheckpoint._merge_state_dictsc                 C   sf   t | t|}tg}|r|tt dg |D ]}t||}t|dks0J | d| dqd S )N01r   z8 seems a bogus DeepSpeed checkpoint folder: Cannot find z* files in there.)r   r   r
   extendr   r	   r   )r>   r   r?   r   file_prefix_listfile_prefix
ckpt_filesr@   r@   rA   r   (  s   
z$DeepSpeedCheckpoint._validate_folderrD   ),__name__
__module____qualname__FINAL_LAYER_NORM_INDEXrB   r0   r/   r1   rP   rT   rW   rY   r[   r=   dictro   r   rq   rr   rt   ru   r   r   r   r   r   r   CHECKPOINT_INFO_KEYr   r   r   r   r   r   r9   rM   r5   rS   r7   r4   r   r!   r   r   r@   r@   r@   rA   r   #   sP    
2
	



r   )r   r   typingr   ra   reshape_3d_utilsr   reshape_utilsr   r   r   r   r	   	constantsr
   r   reshape_meg_2dr   r   r   r   r:   r   rf   r   re   r   r   r   objectr   r@   r@   r@   rA   <module>   s&   
