o
    TiJ<                     @   s   d dl mZ d dlmZ d dlmZ d dlmZ d dlm	Z	 d dl
mZ d dlmZ d dlm  mZ d	d
lmZmZmZmZmZ d dl Z d dlZd dlmZ d dlZ		 	dddZdS )    )nn)DeepSpeedBloomInference)DeepSpeedGPTInference)DeepSpeedBERTInference)DeepSpeedMegatronGPTInference)DeepSpeedOPTInference)DeepSpeedLlama2InferenceN   )LinearLayer	NormalizeEmbeddingLayerOPTEmbeddingRMSNormalize)get_acceleratorc                    s  g 	fdd}| oj jdd 	fdd}	f	dd}
zd	d l}|jjjjt|jd
rA|jjj	j
nd W n   d Y zd	dlm m}m W n
   d  d }d Y zd	dlm W n   d Y i tj|	tj|	tj|	t|	t|	t|	tj|
t|
t|
t|
t|
t|
t|
|	t |	|	t!|	 |	||	|	|	i
i d 
fdd	 D ]}~qd t"#  d S )Nc                     s:    d   D ]} td| r dS td| r dS qdS )Nr   z	^model[.]Fz^transformer[.]T)keysrematch)key)sd [/home/ubuntu/.local/lib/python3.10/site-packages/deepspeed/module_inject/load_checkpoint.pyprefix_check    s   z0load_model_with_checkpoint.<locals>.prefix_checkc                 S   sn   t   |  } | ddd}| d| d }W d    n1 s&w   Y  | | jd | jd S )N)torchno_grad
contiguous	transposereshapecopy_shape)datadata1r   r   r   r   -   s   
z-load_model_with_checkpoint.<locals>.transposec                    s   d |i dg g  f}t | dr| jjd |d  | _|d d  v rQ| jjjrBtjj	j
tj| jjdd| jjjd| _| jjd |d  | _d }t  d S )Nr   Tweightbiascpu)device)r!   requires_grad)hasattrcopyr#   r!   r   r$   is_metar   r   	parameter	Parameter
empty_liker'   gccollect)moduleprefixargs)
error_msgs
mp_replacer   r   r   load5   s   

z(load_model_with_checkpoint.<locals>.loadc                    sj   dkr( fdd}|| | |   D ]\}}|||| d  qd S | d | d S )Ntpc              	      s  |   D ]@\ }  d v rEt ddkrEtd    tu rCd    \}}|}|t  }|jd 
_	nd    t  }d }|j}|j}|j
tjkradnd}|j
tjkrkdndt|dkrt|dkr|| |d kr| |d kr|j
tjkr

jr	|n|}ntjjj|dd}||_t|  | q|| |d kr|n}|| |d krdnd}	|| ||	 krtj|||	 |d t  }
|j
tjks| 
j	d  ksJ d|d	
j	d  d  
j	d	 }nl|j
tjksJ d
 fddttD }|d d|d   kr\| d fdd|D tjfddttd D |d}
ntjdd |D |d}
|j
tjkr{tjdd |D |d}|j
tjkr
jr
j	|
|dkrdnddn
|
}
ntjjj|
dd}
||
_t|  |
 q|d |d kr|j| q|d |d krt||d	  t   }|j| q|d djj  kr%|d d  fddttD |jtjfddttd D ddt    q|jtj fddttD ddt    qd S )Nr   .r	      F)r'   dimzERROR: We require the quantization scales for larger TP-size when loading INT8 checkpoint!                                           Please use the FP16 checkpoint to generate INT8 checkpoint with the sharding parameters!r   zMerging of the checkpoints are not supported when using INT8 checkpoint!                                           Please use a as many GPUs as TP-size for the checkpointc                    sP   g | ]$}t |    tu r|    n|    t  qS r   typelisttor   current_device_name.0jnr1   r   r   r   
<listcomp>n   s    8zgload_model_with_checkpoint.<locals>.load_transformer_layer.<locals>.load_parameters.<locals>.<listcomp>   c                    s"   g | ]}t j|d  j dqS r   r9   )r   splitr!   )rA   src)	outer_dimqkv_sizer   r   rE   u   s    c                    s(   g | ] t j fd dD dqS )c                       g | ]}|  qS r   r   rA   qkv_sir   r   rE   z       rload_model_with_checkpoint.<locals>.load_transformer_layer.<locals>.load_parameters.<locals>.<listcomp>.<listcomp>axisr   catrA   )rJ   	src_splitrO   r   rE   y       c                 S   s0   g | ]}t |tu r|d  t  n|qS )r   r;   rA   adr   r   r   rE      s
    c                 S   s    g | ]}|d   t  qS )r	   )r>   r   r?   rZ   r   r   r   rE      s     )parallel_dimc                    s(   g | ]}t j|    d dqS rG   )r   rH   r@   )rD   r1   rK   r   r   r   rE      s    c                    s(   g | ] t j fd dD ddqS )c                    rL   r   r   rM   rO   r   r   rE      rQ   rR   r   rS   rU   rW   )rX   rO   r   rE      rY   c                    s   g | ]
}|    qS r   r   r@   rC   r   r   rE      s    )named_parameterslenrH   r<   r=   r>   r   r?   r    
num_groupsdtyper   int8quantizeq_int8r   r+   r,   scalesetattrnumelviewr   r   rangerV   r!   r   confighidden_size)r0   r1   ptmp_datard   	src_shape	dst_shape	inner_dimr:   dim1weight_partitionall_data
bias_split)ckpt_mp_sizer_modulerankr   r   weight_quantizer)rD   rJ   r1   rK   rX   r   load_parametersF   s   & $



 zSload_model_with_checkpoint.<locals>.load_transformer_layer.<locals>.load_parametersr7   r   )named_childrenload_params)r0   r1   rx   rD   child)	rt   	ckpt_type	containerr4   ru   rv   r   r   rw   r   r   load_transformer_layerC   s   
hz:load_model_with_checkpoint.<locals>.load_transformer_layerr   llama)ColumnParallelLinearParallelEmbeddingRowParallelLinear)RMSNorm c           	         sd  |   D ]*\}}|jv r|| d  t fdd
d  D sRt|drQt|jdrQ|jjv rQ|jj }|jtju rQt	j
|jj d}t| || qt| }t|dkr|d  dksm|d jr|jjrv|jj}n|jj}|jtju rt|d |jj|jd	}t| || nt|jtjfv rt	j
|jj|jj|jd
}t| || nW|ju rt|d}t| || nF|jfv rt|d |jjt|dr|jn|jd	}t| || n$d }t|jdr|jj}t||jjd}|d ur|j|< t| || |j ||| d  q	||dkr%dkr%r%|n|| d |d  qd S )Nr7   c                 3   s    | ]} |v V  qd S )Nr   )rA   itemchecking_keyr   r   	<genexpr>   s    zLload_model_with_checkpoint.<locals>.load_module_recursive.<locals>.<genexpr>r   r#   ds_id)r#   r   )r:   r`   eps)weight_shaper`   r$   )r   r   )r   r`   ppr	   )ry   	__class__anyr   r(   r#   r   r   Linearr
   from_weightsre   r=   
parametersr^   rf   r*   r    ds_shape	LayerNormr   r`   r   r$   r   r   variance_epsilonr   )	r0   r1   levelnamer{   prefix1child_paramsr   r   )r   LlamaRMSNormOPTLearnedPositionalEmbeddingr   r   
all_ds_idsr|   layer_policiesload_module_recursiver   skip_level_0_prefixr   r   r      sf   

*






z9load_model_with_checkpoint.<locals>.load_module_recursive)r   r   )$policyuse_load_prefixtransformersmodelsoptmodeling_optr   r(   r   modeling_llamar   "fairscale.nn.model_parallel.layersr   r   r   llama.modelr   r   r   	Embeddingr   r   r
   r   transformer_inferenceDeepSpeedTransformerInferencer   r   r   r   r   r   r   r   r.   r/   )ru   r   r4   r|   rt   rw   rv   r}   r   r5   r~   r   r   sd_r   )r   r   r   r   r   r   rt   r|   r}   r3   r   r   r4   ru   rv   r   r   r   rw   r   load_model_with_checkpoint   s   q	
"3r   )Nr   N)r   r   5deepspeed.model_implementations.transformers.ds_bloomr   3deepspeed.model_implementations.transformers.ds_gptr   4deepspeed.model_implementations.transformers.ds_bertr   <deepspeed.model_implementations.transformers.ds_megatron_gptr   3deepspeed.model_implementations.transformers.ds_optr   6deepspeed.model_implementations.transformers.ds_llama2r   deepspeed.ops.transformeropstransformerr   layersr
   r   r   r   r   r.   deepspeed.acceleratorr   r   r   r   r   r   r   <module>   s"   