o
    Ti                     @   s   d dl T d dlmZmZmZ ddlmZmZ ddlm	Z	 ddl
Z
ddlmZ dd	lmZmZmZmZmZmZ G d
d deeeeZG dd deZdS )   )*)HybridSplitQKVContainerHybridGatedMLPContainerMetaTensorContainer    )ActivationFuncTypeNormType)DeepSpeedGPTInferenceN)	Parameter   )TransformerPolicytransformer_param_names
maybe_copymaybe_copy_qkvmaybe_copy_geglumaybe_get_lorac                       sN   e Zd Z fddZdddZdd Zdd	 Zd
d Zdd Zdd Z	  Z
S )DS_LLAMAContainerc                    s   t  jdi | d S )N )super__init__)selfkwargs	__class__r   \/home/ubuntu/.local/lib/python3.10/site-packages/deepspeed/module_inject/containers/llama.pyr      s   zDS_LLAMAContainer.__init__Nc                 C   sP   |d ur|n| j }d|_d|_| j| j |_| jjjj	|_	t
|| jd| _| jS )NTF)mp_group)ds_model_configrotate_halfrotate_every_twohidden_sizenum_attention_heads
rotary_dimpolicyclient_module	self_attn
rope_thetar	   r   module)r   config_configr   r   r   create_module   s   zDS_LLAMAContainer.create_modulec                 C   sh   dd | j jjjj| j jjjj| j jjjj| j jjjj| j jjj	j| j jjj
j| j jjjjfD | _dS )zD
        Necessary to implement for `HybridEngineContainer`
        c                 S   s   g | ]}t |qS r   )r   ).0pr   r   r   
<listcomp>.   s    z5DS_LLAMAContainer.set_lora_params.<locals>.<listcomp>N)r"   r#   mlpup_projweight	gate_proj	down_projr$   q_projk_projv_projo_projlora_paramsr   r   r   r   set_lora_params*   s   z!DS_LLAMAContainer.set_lora_paramsc           	      C   sV   |   \}}}}}}}|| jf|| jf|| jf|| jf|| jf|| jf|| jfg}|S N)get_lora_params
inter_up_winter_gate_w_4hh_wdense_wqwkwvw)	r   up_proj_loragate_proj_loradown_proj_loraq_lorak_lorav_loraout_loraretr   r   r   get_lora_matched_pair7   s
    z'DS_LLAMAContainer.get_lora_matched_pairc                 C   sF   | j jjjj| _d| _| j jjjj| _d| _	| j jjj
j| _d| _dS )zF
        Necessary to implement for `HybridSplitQKVContainer`
        N)r"   r#   r$   r2   r/   r?   qbr3   r@   kbr4   rA   vbr7   r   r   r   	set_q_k_v=   s   
zDS_LLAMAContainer.set_q_k_vc                 C   s0   | j jjjj| _d| _| j jjjj| _d| _	dS )zF
        Necessary to implement for `HybridGatedMLPContainer`
        N)
r"   r#   r-   r.   r/   r;   
inter_up_br0   r<   inter_gate_br7   r   r   r   set_mlp_gateH   s   
zDS_LLAMAContainer.set_mlp_gatec                 C   s   d}t |j|||d||d  ||d  ||d  g| jjd tddD ]}t|j|||t|d  |||   q$t|j|||d	||d  ||d
  g t|j|||d||d   t|j|||td ||d   t||||td ||d   d |j_	d S )N)	zself_attn.q_proj.weightzself_attn.k_proj.weightzself_attn.v_proj.weightzself_attn.o_proj.weightzmlp.up_proj.weightzmlp.gate_proj.weightzmlp.down_proj.weightzpost_attention_layernorm.weightzinput_layernorm.weight	attn_qkvwr   r   r   )	split_qkv      inter_w   output_w         
   )
r   	attentionr"   rS   ranger   r   r   r-   output_b)r   r&   sdweight_quantizer
mp_replaceprefixparam_namesir   r   r   load_paramsQ   s&   "
 zDS_LLAMAContainer.load_paramsr9   )__name__
__module____qualname__r   r)   r8   rJ   rN   rQ   rf   __classcell__r   r   r   r   r      s    
	r   c                       sB   e Zd Zd fdd	Zdd ZdddZdd	d
Zdd Z  ZS )LLAMALayerPolicyTc                    sL   t  j|tjtjd || _zdd l}|jj	j
jt_W d S    d t_Y d S )N)mlp_act_func_type	norm_typer   )r   r   r   
GATED_SILUr   RMSNormr#   transformersmodelsllamamodeling_llamaLlamaDecoderLayerrk   _orig_layer_class)r   r#   	inferencerp   r   r   r   r   u   s   zLLAMALayerPolicy.__init__c                 C   s,   | j jjj| j jj| j jj| j jjj	f}|S r9   )
r#   r$   r2   in_features	num_headsinput_layernormvariance_epsilonr-   r0   out_features)r   hidden_headsr   r   r   get_hidden_heads   s   

z!LLAMALayerPolicy.get_hidden_headsFc                 C   sT   | j jjj}| j jjj}| j jjj}ttj|||fdd|d}|d | j jj	jd fS Nr   )dim)requires_grad)
r#   r$   r2   r/   r3   r4   r
   torchcatr5   )r   enable_trainingr?   r@   rA   qkvwr   r   r   r]      s   
zLLAMALayerPolicy.attentionc                 C   sJ   | j jjj}| j jjj}| j jjj}ttj||fdd|d}|d |d fS r~   )	r#   r-   r.   r/   r0   r1   r
   r   r   )r   r   mlp1_up	mlp1_gatemlp2mlp1r   r   r   r-      s
   zLLAMALayerPolicy.mlpc                 C   s   | j jjd | j jjd fS r9   )r#   post_attention_layernormr/   ry   r7   r   r   r   	layernorm   s
   zLLAMALayerPolicy.layernorm)T)F)	rg   rh   ri   r   r}   r]   r-   r   rj   r   r   r   r   rk   s   s    
	
	rk   )basefeaturesr   r   r   deepspeed.utils.typesr   r   3deepspeed.model_implementations.transformers.ds_gptr	   r   torch.nn.parameterr
   r"   r   r   r   r   r   r   BaseTransformerContainerr   rk   r   r   r   r   <module>   s    
\