o
    TiG                     @   s   d dl Z d dlZd dlZd dlZd dlZd dlmZmZ d dlm	Z	 d dl
mZ ddlmZ dZG dd	 d	ZG d
d deZG dd deZdS )    N)ABCabstractmethod)logger)TorchCheckpointEngine   )WeightQuantizationautoc                   @   s&   e Zd Zedd ZedddZdS )SDLoaderFactoryc           	      C   s   t | trt| }t|}W d    n1 sw   Y  n	t | ts&J | }|d }|d }|d }|dd}|dd}| dv rH|S t	||||S )	Ntypecheckpointsversionparallelizationppmp_sizer   )bloomds_model)

isinstancestropenjsonloaddictgetlowerr	   get_sd_loader)		json_filecheckpoint_enginefdatasd_type	ckpt_listr   	ckpt_typer    r"   X/home/ubuntu/.local/lib/python3.10/site-packages/deepspeed/runtime/state_dict_factory.pyget_sd_loader_json   s   

z"SDLoaderFactory.get_sd_loader_jsonMegatronNc                 C   s"   |dkr
t | ||S J d|)Nr%   Fz#{} checkpoint type is not supported)MegatronSDLoaderformat)r    r   r   r   r"   r"   r#   r   (   s   zSDLoaderFactory.get_sd_loader)r%   N)__name__
__module____qualname__staticmethodr$   r   r"   r"   r"   r#   r	      s
    
r	   c                   @   s~   e Zd Zdd ZedddddfddZd	d
 Zdd Zdd Zdd Z	dd Z
dd Zedd Zedd Zedd ZdS )SDLoaderBasec                 C   s2   d | _ || _|| _|d u rt n|| _|   d S N)
module_keyr    r   r   r   check_ckpt_listselfr    r   r   r"   r"   r#   __init__2   s
   zSDLoaderBase.__init__F   @   Tc	                 C   s   || _ t| j}	||	 | }
	 |r|d ur||	kr|	}d}
| j|
 }d}|	|krXtj|s0J | jj|dd d}|rUt||d}|	| 
|||\}}| || n!d }n|	|krj| ||||||\}}}n| ||||||\}}||||ffS )Nr   r   c                 S      | S r-   r"   storagelocr"   r"   r#   <lambda>a   s   z#SDLoaderBase.load.<locals>.<lambda>map_locationmlp_extra_groupingr   )r.   lenr    ospathexistsr   r   r   sd_quantize_megatron
get_module
set_modulemerge_state_dictsplit_state_dict)r1   mp_world_sizemp_rankr.   is_pipe_parallelquantizequantize_bitsquantize_groupsr=   num_ckptidx	load_pathmerge_countsd	quantizer	sd_module
all_scalesr"   r"   r#   r   9   s6   	


zSDLoaderBase.loadc                    sv   t  j}|| dksJ d|| } fddt|| ||d  D }td| d|   fdd|D }|S )	Nr   z/Invalid checkpoints and world size for sd mergec                    s   g | ]} j | qS r"   )r    ).0ir1   r"   r#   
<listcomp>x       z6SDLoaderBase.get_merge_state_dicts.<locals>.<listcomp>r   	mp_rank: , ckpt_list: c                    s    g | ]} j j|d d dqS )c                 S   r5   r-   r"   r6   r"   r"   r#   r9   {       z?SDLoaderBase.get_merge_state_dicts.<locals>.<listcomp>.<lambda>r:   )r   r   )rU   ckptrW   r"   r#   rX   {   s     )r>   r    ranger   info)r1   rG   rH   rM   num_to_merger    sd_listr"   rW   r#   get_merge_state_dictss   s   
$z"SDLoaderBase.get_merge_state_dictsc                 C   s|   t | j}|| dksJ d|| }|| }|| }td| d| j|  d|  | jj| j| dd d}|||fS )	Nr   z/Invalid checkpoints and world size for sd splitrZ   r[   z
, offset: c                 S   r5   r-   r"   r6   r"   r"   r#   r9      r\   z3SDLoaderBase.get_split_state_dict.<locals>.<lambda>r:   )r>   r    r   r_   r   r   )r1   rG   rH   rM   num_to_split
ckpt_indexckpt_offsetrQ   r"   r"   r#   get_split_state_dict~   s   
"
z!SDLoaderBase.get_split_state_dictc                 C   sL   d|v rd|v rJ dd|v sd|v sJ dd|v rdS d|v r$dS d S )NmodulemodelzFcheckpoint has both 'model' and 'module' keys, not sure how to proceedzMcheckpoint contains neither 'model' or 'module' keys, not sure how to proceedr"   r1   rQ   r"   r"   r#   _choose_module_key   s   zSDLoaderBase._choose_module_keyc                 C   s0   | j d u r|S | j tkr|| | S || j  S r-   r.   AUTO_MODULE_KEYrj   ri   r"   r"   r#   rC      s
   


zSDLoaderBase.get_modulec                 C   s<   | j d u r	|}|S | j tkr||| |< |S ||| j < |S r-   rk   )r1   rQ   rg   r"   r"   r#   rD      s   


zSDLoaderBase.set_modulec                 C   sp   t | jdks	J | jj| jd dd d}d| v r4t | j|d ks6J dt | j d|d  d S d S )Nr   c                 S   r5   r-   r"   r6   r"   r"   r#   r9      r\   z.SDLoaderBase.check_ckpt_list.<locals>.<lambda>r:   rG   zcheckpoint count z' is different from saved mp_world_size )r>   r    r   r   keysri   r"   r"   r#   r/      s   
zSDLoaderBase.check_ckpt_listc                 C      d S r-   r"   r1   rG   rH   rJ   rK   groupsr=   r"   r"   r#   rE         zSDLoaderBase.merge_state_dictc                 C   rn   r-   r"   ro   r"   r"   r#   rF      rq   zSDLoaderBase.split_state_dictc                 C   rn   r-   r"   )r1   ckpt_file_namer"   r"   r#   sanity_check   rq   zSDLoaderBase.sanity_checkN)r(   r)   r*   r2   rl   r   rb   rf   rj   rC   rD   r/   r   rE   rF   rs   r"   r"   r"   r#   r,   0   s*    

:		

r,   c                       s`   e Zd Z fddZdd Zdd Z					
dddZ					
dddZdd Zdd Z	  Z
S )r&   c                    s   t  ||| d S r-   )superr2   r0   	__class__r"   r#   r2      s   zMegatronSDLoader.__init__c                    s   d}|dkrJ|d j d d dksJ |d j d d fdd|D }g }tdD ]  fdd|D }|tj|dd q+tj|dd}|S |dksR|d	kr[tj|dd}|S J d| d)  
        Up to now we found 3 Q/K/V parameter formats in different Megatron checkpoint versions:

        1. version 0, there is no version information saved in checkpoint.
            format: [(3 * np * hn), h]
        2. version 1.0
            format: [(np * hn * 3), h]
        3. version 2.0
            format: [(np * 3 * hn), h]

        h: hidden size
        n: number of attention heads
        p: number of model parallel partitions
        np: n/p
        hn: h/n
        Nr      c                    s   g | ]
}t j| d dqS )r   dim)torchsplit)rU   param)size_qkvr"   r#   rX      s    z:MegatronSDLoader.merge_query_key_value.<locals>.<listcomp>c                       g | ]}|  qS r"   r"   )rU   t)rV   r"   r#   rX          axis      ?       @Fcheckpoint version:  is not supported)shaper^   appendr{   cat)r1   
param_listckpt_vernew_qkvsplit_tensorstensorstensor_tupler"   )rV   r~   r#   merge_query_key_value   s   z&MegatronSDLoader.merge_query_key_valuec                 C   s  d}|dkrW|j d d dksJ |j d d }tj||dd}|d j d | dks-J |d j d | }g }	tdD ]}
|	tj||
 |dd|  q<tj|	dd}|S |dks_|dkr|j d | dksjJ |j d | }tj||dd}|| }|S J d	| d
)rw   Nr   rx   ry   r   r   r   Fr   r   )r   r{   r|   r^   r   r   )r1   r}   rc   offsetr   r   r~   r   
split_sizer   rV   r"   r"   r#   split_query_key_value  s&    
z&MegatronSDLoader.split_query_key_valueFr3   r4   Tc                    s   jd  ||}t|d }t }	fdd|D }
|
d  }|}t	
d|  |r=t||d}|D ]  fdd|
D }d v sRd v rh|r^|j||| d	d
}tj|d	d|	 < q?d v r|rd v r|j||| d}tj|dd|	 < q?|rtj|dd|	 < q?|||	 < q?d v sd v sd v r|rd v r|j||| d}tj|dd|	 < q?|d |	 < q?|r| }||	}||r|nd t|
fS )Nr   c                    s   g | ]}  |qS r"   )rC   rU   rQ   rW   r"   r#   rX   :  rY   z5MegatronSDLoader.merge_state_dict.<locals>.<listcomp>r   r<   c                    r   r"   r"   r   keyr"   r#   rX   C  r   attention.dense.weightmlp.dense_4h_to_h.weightr   )r   	merge_dimr   attention.query_key_value attention.query_key_value.weightr   mlp.dense_h_to_4h.weightword_embeddings.weightmlp.dense_h_to_4h.bias)rs   r    rb   copydeepcopycollectionsOrderedDictrm   get_checkpoint_versionr   r_   r   Quantizer{   r   r   merge_scalesrD   r>   )r1   rG   rH   rJ   rK   rp   r=   ra   ds_sdnew_client_sdclient_sd_listrm   r   rR   
value_listrT   r"   )r   r1   r#   rE   -  s@   
z!MegatronSDLoader.merge_state_dictc                 C   s  |  ||\}}}	t|}
t }| |}| |
}td|  |r,t	||d}|
 D ]}|| }d|v s>d|v rl|jd | dksIJ |jd | }|r_||g|||}|d }tj||dd|	 ||< q0d|v r|rd	|v r||g|||}|d }| |||	|||< q0d
|v sd|v sd|v sd|v r|jd | dksJ |jd | }|rd
|v r||g|||}|d }tj||dd|	 ||< q0|||< q0|r||}| |
|}
|
|r|fS d fS )Nr   r<   r   r   r   r   ry   r   r   r   r   r   zfinal_linear.weight)rf   r   r   r   r   rC   r   r   r_   r   rm   r   r   r{   r|   r   merge_scales_splitrD   )r1   rG   rH   rJ   rK   rp   r=   rQ   rc   re   r   r   	client_sdr   rR   r   valuer   q_valsrT   r"   r"   r#   rF   ^  sD   	


 

z!MegatronSDLoader.split_state_dictc                 C   sV   g d}| j j|dd d}dd }|D ]}||| |s(J d| d| qd S )	N)r   r   r   r   r   c                 S   r5   r-   r"   r6   r"   r"   r#   r9     r\   z/MegatronSDLoader.sanity_check.<locals>.<lambda>r:   c                 S   s,   |  }d}|D ]}| |v rd} |S q|S )NFT)rm   )partial_keyrQ   rm   foundkr"   r"   r#   check_key_exist  s   z6MegatronSDLoader.sanity_check.<locals>.check_key_existzkey: z  is not found in the checkpoint )r   r   rC   )r1   rr   keys_to_checkrQ   r   r   r"   r"   r#   rs     s   	zMegatronSDLoader.sanity_checkc                 C   s   | j d ur| j S |ddS )Ncheckpoint_versionr   )r   r   )r1   
state_dictr"   r"   r#   r     s   z'MegatronSDLoader.get_checkpoint_version)Fr3   r4   T)r(   r)   r*   r2   r   r   rE   rF   rs   r   __classcell__r"   r"   ru   r#   r&      s     &.
4
5r&   )r{   r?   r   r   r   abcr   r   deepspeed.utilsr   ;deepspeed.runtime.checkpoint_engine.torch_checkpoint_enginer   weight_quantizerr   rl   r	   r,   r&   r"   r"   r"   r#   <module>   s    