o
    Ti`                     @   s   d dl Z d dlmZ ddlmZ d dlmZ d dlZd dlmZ	 ddl
T d dlmZ dd	lmZ d d
lmZmZ d dlmZ d dlmZ dddZG dd dZG dd dZG dd dZdS )    N)nn   )replace_policies)Optional)comm)*)get_accelerator)require_tp_fused_qkvw)get_shard_sizeget_shard_size_list)groups)is_autotp_training_modeTc                 C   s"   | j r
tj| |dS | j||dS )Ndevice)copy)is_metatorch
empty_liketo)tensorr   r    r   S/home/ubuntu/.local/lib/python3.10/site-packages/deepspeed/module_inject/auto_tp.pymove   s   r   c                   @   sX   e Zd ZdddZdd Z		dd	eej d
eej dede	de	f
ddZ
dddZdS )ReplaceWithTensorSlicingNr   r   c                 C   s4   |d urt j|d| _nd| _|| _|| _|| _d S )N)groupr   )distget_rank	gpu_indexout_dimin_dimmp_size)selfmp_groupr    r   r   r   r   r   __init__"   s   
z!ReplaceWithTensorSlicing.__init__c                 C   s   ||ksJ dd S )NzMerging tensors is not allowed here! Please use deepspeed load_checkpoint            for merging your checkpoints before replacing the transformer layer with            inference-kernelsr   )r!   dim1dim2r   r   r   merge_assert+   s   
z%ReplaceWithTensorSlicing.merge_assertFdstsrc
num_splitsint8allocate_tensorc                    s  |d u r|S |j }|j }|rdnd |rt|}tj|j|j   |  d}t|dkrt|dkr|  || j krrz|dj|jd|j }W n   t	|j |j  t
  Y tjjj|dd}t|drp|j|_|S | |  || j  || j |  fdd	|D  fd
d	ttd D }	|dj|	| j  d|	| j j }n8|d |d krtjj|S |d | fdd	|D fdd	ttd D }
|j|
| j   tjjj|dd}t|dr|j|_|S )Nr   dim   Frequires_gradscalec                    s   g | ]
}t j| d qS )r-   r   split.0src_s)	outer_dimqkv_sizer   r   
<listcomp>O       z9ReplaceWithTensorSlicing.strided_copy.<locals>.<listcomp>c                    s(   g | ] t j fd dD dqS )c                       g | ]}|  qS r   r   r6   qkv_sir   r   r:   Q       DReplaceWithTensorSlicing.strided_copy.<locals>.<listcomp>.<listcomp>axisr   catr6   )r8   	qkv_splitr?   r   r:   P   s    c                    s   g | ]
}t j| d dqS )r   r-   r3   r5   )r9   r   r   r:   Y   r;   c                    s(   g | ] t j fd dD ddqS )c                    r<   r   r   r=   r?   r   r   r:   Z   rA   rB   r   rC   rE   rG   )rH   r?   r   r:   Z   s   ( )shaper   r   r4   datalenr   reshapecopy_printexitr   	parameter	Parameterhasattrr2   r&   ranger   
contiguous)r!   r'   r(   r)   r*   r+   	src_shape	dst_shape	src_splitweight_split
bias_splitr   )r8   r9   rH   r   strided_copy1   sL   
&
"
z%ReplaceWithTensorSlicing.strided_copyc           	      C   sP  |d u r|S |j jrJ |rt|}|rdnd}|rdnd}|j}|j}t|dkrt|dkr|| || j krU|| || j krU|dj 	|j d|j}n|| || j kr| 
|| || j  |j 	|dkr|d d | j|| j  | jd || j  f n|| j|| j  | jd || j  d d f  nq| 
|| || j  |j 	|dkr|d d | j|| j  | jd || j  f n|| j|| j  | jd || j  d d f  n-|d |d kr|j|jkr|n|j 	|}n|j 	|| j|d  | jd |d    tjjj|dd}t|dr&|j|_|S )Nr   r   r/   r,   Fr0   r2   )rJ   r   r   r   rI   rK   r   r   rL   rM   r&   r   dtyper   rP   rQ   rR   r2   )	r!   r'   r(   r*   r+   r8   	inner_dimrU   rV   r   r   r   r   b   s8   
$$>.>.,zReplaceWithTensorSlicing.copy)Nr   r   r   )FF)__name__
__module____qualname__r#   r&   r   r   TensorintboolrZ   r   r   r   r   r   r       s"    
	

1r   c                   @   s&   e Zd Zdd Zdd ZdddZdS )	Loadingc                 C   s.   t jt jt jg}g d}| j|v p|  |v S )N)LPLayerNormSharedEmbeddingOPTLearnedPositionalEmbeddingLlamaRMSNormFalconLinearMistralRMSNormT5LayerNormMixtralRMSNormPhi3RotaryEmbeddingPhi3SuScaledRotaryEmbeddingPhi3RMSNormYuanRMSNormYuanRotaryEmbedding!Phi3LongRoPEScaledRotaryEmbeddingQwen2RMSNormQwen3RMSNormQwen3MoeRMSNormDeepseekV2RMSNormDeepseekV3RMSNormDeepseekV2YarnRotaryEmbeddingDeepseekV3YarnRotaryEmbeddingMoEGate)r   Linear	Embedding	LayerNorm	__class__	_get_name)moduleload_layersload_layer_namesr   r   r   is_load_module   s   zLoading.is_load_modulec                 C   s   | j  D ]8}| j | jjr(tjjjtj| j | jdd| j | jj	d| j |< || | v r=| j | j
|||   qd S )Ncpur   rJ   r1   )_bufferskeysrJ   r   r   r   rP   rQ   r   r1   rM   )r   
state_dictprefixnamer   r   r   load_buffer   s   zLoading.load_bufferNc                 C   s  t |d}t| drE| jjjrDtjjjtj	| jjdd| jjj
d| _d|v r7|j| jj||d  dd| _nE|| jj||d  | _n7t| d	r|t| jdr|| jjjjrmtjjjtj	| jjjdd| jjjj
d| j_|| jjj||d  | j_|d
 | v rt| d
r| jjjrtjjjtj	| jjdd| jjj
d| _|| j||d
  | _d S t| d	rt| jd
r| jjjjrtjjjtj	| jjjdd| jjjj
d| j_|| jj||d
  | j_d S d S d S d S )Nr"   weightr   r   r   query_key_value   )r)   normbias)r   rR   r   rJ   r   r   r   rP   rQ   r   r1   rZ   r   r   r   r   )r   r   r   r"   
mp_replacer   r   r   load   sH   











 zLoading.loadN)r]   r^   r_   r   r   r   r   r   r   r   rc      s    	rc   c                   @   s   e Zd Z	d$ddZdd Zdd Zdd	 Zd
d Zdd Zdd Z	dd Z
dd Zdd Zdd Zdd Zdd Zd%ddZdd  Zd!d" Zd#S )&AutoTPFc                 C   sJ   || _ || _|| _|| _d | _d | _|| _|| _d | _d| _	t
| d S )NF)r   all_reduce_linearsr   r   r    r"   linear_layer_settingorig_layer_impllinear_policiesconv_linear_layerTensorParallel_Layerset_keep_module_on_host)r!   r   r   r   r   r   r   keep_module_on_hostr   r   r   r#      s   zAutoTP.__init__c                 C   s(   |D ]}t |jt | jkr dS qdS NTF)typer]   )r   module_listitemr   r   r   in_module_list   s
   zAutoTP.in_module_listc                 C   sb   g }|   D ](}t|tjr'|  D ]}|s|g}qt||s%||g }qq|t| }q|S r   )children
isinstancer   
ModuleListr   r   get_module_list)modelmlistchildr   r   r   r   r      s   
zAutoTP.get_module_listc                 C   sn   g d}t | } td| }|d u rtd| }|d u r"td| }|d us*J d|d |v r5dS dS )	N)debertaflaubertfsmtgpt2led
longformerxlmxlnetz: (.*?)Modelz: (.*?)Stackz
(.*?)ModelHNot able to determine model policy automatically. Please provide policy.r   FT)strresearchmatchr   lower)r   unsupportedkeyr   r   r   	supported   s   zAutoTP.supportedc                 C   st   g }|j  D ]0\}}t|tjr|| d | g }qt|tjs)|dks)|dkr/|dg }q|t|| }q|S )N.r|   
layer_normln)_modulesitemsr   r   rz   r|   r   
get_layers)parentr   
layer_listr   	submoduler   r   r   r      s   zAutoTP.get_layersc                 C   sp   t | r+t| D ]"\}}|d t|kr*t||d  }tt||g| |< |   S q| tt||g | S )Nr   r   )rK   	enumerater   settupleappend)policy_list
new_modulenew_gemsr@   policyr   r   r   update_policy_list  s   zAutoTP.update_policy_listc                 C   sn   g }t D ]#}|d }t|jtr|jD ]}|| qq|jd ur'||j q| D ]
}|j|v r4 dS q*dS r   )r   r   _orig_layer_classlistr   r}   )r   r   plcy_orig_layer_classr   r   r   r   kernel_supported  s   


zAutoTP.kernel_supportedc                 C   sF  g }g }g }g }t | }t | sJ t |rddg d}|D ]}|j D ]*\}}t|tjr=|d| g }q+t|tj	sG||v rM|dg }q+|t 
|| }q+t|D ]\}	}
|
dkrt||	d  dkrs|||	d  g }qZd|
v r~||
g }qZd|
v r||
g }qZd	|
v r||
g }qZd
|
v rdt| v r||
g }qZd|
v rdtt|v r||
g }qZd|
v rdtt|v r||
g }qZd|
v rdtt|v r||
g }qZd|
v rdt| v r||
g }qZd|
v rdt| v r||
g }qZg }|g krtt|}t |||}g }q$t|s!J t |rdd|S )NzdAutoTP not supported for model. Please use kernel injection since container policy for model exists.z6AutoTP not supported for model. Please provide policy.)r|   r   ln_1ln_2r   r   r   out_projo_proj	down_projzattention.denseGPTNeoXzself_attention.densefalconw2Mixtralzself_attn.densePhiChatGLMdense_4h_to_hr   )r   r   r   r   r   r   r   r   rz   r|   r   r   r   r   r   r   r   rK   )r   r   r   r   gem_listnorm_layer_name_listr   r   r   r@   layerr   r   r   	tp_parser  sp   



zAutoTP.tp_parserc                 C   s.   t  rt | _t | _d S || _|| _d S r   )r   r   get_tensor_model_parallel_groupr"   $get_tensor_model_parallel_world_sizer    )r!   r    r"   r   r   r   set_tensor_parallel_configR  s   


z!AutoTP.set_tensor_parallel_configc                 C   s  t |dddkr
d S |jj}t| jd}d|ks5d|v s5d|v s5|dks5d	|ks,d|kr7d
tt| jv r7|S dt| jv rRd|v rHt|| jS d|v rRt	|| jS d|v sad|v rgdt| jv rgt
|| jS d}dt| jv rvd|v rvd}d}d|v r~d}|| jv s|s|rt|dd | jrt|| j|dS |dks|dkrt|| jS t|| j|dS t|dd | jrt|| j nt|| jrt|| j| jdS t|| j|dS )NreplacedFTr   zmlp.gateq_a_projkv_a_proj_with_mqazblock_sparse_moe.gatezmlp.shared_expert_gate	qwen2_moeYuanv_projr   gate_up_projdense_h_to_4hGLMArcticr   r   )r   lm_head	embed_out)fused_module)getattrr   rI   r   r"   r   r   r   Yuan_LinearLayerYuan_LinearAllreduceGateUpPack_LinearLayerr   setattrr   Conv_LinearALlreduceLmHeadLinearAllreduceLinearAllreduceconv_LinearLayerr	   r    fused_LinearLayerLinearLayer)r!   r   r   r   weight_shaper   arctic_w2_all_reduce_linearr   r   r   r   _replace\  sB    "zAutoTP._replacec                 C   s   t |dddkr
d S t| jd}t|jdr)|jjjjt|jj	d | j
dd}n|jjjt|jj	d | j
|dd}||j t  }tjjj|dd}t|jj	d	 t|jj	d | j
|}|jj| t|dd |S )
Nr   FTr   	ds_tensorr   r-   r0   r   )r   r   r"   rR   r   r   rJ   r4   r   rI   r    r   r   r   current_device_namer   r   rP   rQ   r{   r
   rM   r   )r!   r   r   r   r   rJ   new_embeddingr   r   r   _slice_embedding  s   &$&zAutoTP._slice_embeddingc                 C   s|   t |dddkr
d S g d}|D ]%}dt|v r!d|v r!|d t||r5t ||}t||t|| j qt|dd d S )Nr   FT)n_headsr\   	num_headsnum_kvnum_attention_headsnum_attn_headsall_head_size	embed_dimhidden_sizenum_key_value_headsnum_kv_heads
kv_n_headsd_model!num_attention_heads_per_partition$num_multi_query_groups_per_partitionhidden_size_per_partitionr   r
  )r   r   removerR   r   r
   r    )r!   r   
param_listparam	param_valr   r   r   update_mp_params  s   


zAutoTP.update_mp_paramsc                 C   s   d| _ | jd ur(| jd | ji| _t| jdkr&| j| jd | ji d S d S dd l}| j|j	j
jju rUzd| _ |jj| ji| _W d S  tyT   tj| ji| _Y d S w tj| jtj| ji| _d S )NFr   r/   r   T)r   r   r   r   rK   updater  transformersr   modelsr   modeling_gpt2	GPT2Blockpytorch_utilsConv1DImportErrorr   rz   r{   )r!   r  r   r   r   update_linear_policies  s   
zAutoTP.update_linear_policies c                    s  |  D ]\}|dkr|}n|dkr|}n|d | }|dkr-| jd | d | d n| jd | d  trX| jd urXt fdd| jD rWt| j | j nqtj	dkrl| jd urlt
| j  j| jv rt||| jj |d | | j qtfdd| jD rd }| jD ]}t|r|} nq|d usJ t||| j| |d | | j q|  | || q|S )Nr!  r   c                 3   s    | ]} |v V  qd S r   r   )r6   r   )checking_keyr   r   	<genexpr>  s    z)AutoTP._replace_module.<locals>.<genexpr>r   c                 3   s    | ]}t  |V  qd S r   )r   )r6   lp)r   r   r   r#    s    )named_childrenr   rc   r   r   anyr   r"   rK   r   r   r}   r   r   r   r   r  _replace_module)r!   r_module	prev_nameprev_class_namer   
class_namer   r$  r   )r"  r   r   r'    s>   4


zAutoTP._replace_modulec                 C   s<   d }g d}|D ]}t ||rt||}|d ur |S q|S )N)multi_query_group_numr  r  r  r  attention_heads)rR   r   )r!   configr  kv_head_namesr   r   r   r   get_model_num_kv_heads  s   

 zAutoTP.get_model_num_kv_headsc                 C   s^   t |drd}|j}nt |drd}|j}n|S |j| jv r-t||| j|j ||| j |S )Nr   r   )rR   r   r   r}   r   r   r   )r!   r(  r   r   r   r   r   _replace_last_linear_module  s   

z"AutoTP._replace_last_linear_moduleN)F)r!  r!  )r]   r^   r_   r#   r   r   r   r   r   r   r   r   r   r  r  r   r'  r0  r1  r   r   r   r   r      s$    	
5
3
#r   )T)r   r   r   replace_policyr   typingr   	deepspeedr   r   layersdeepspeed.acceleratorr   fusedqkv_utilsr	    deepspeed.module_inject.tp_shardr
   r   deepspeed.utilsr   deepspeed.module_inject.layersr   r   r   rc   r   r   r   r   r   <module>   s    

d=