o
    Ti                     @   s   d dl T d dlmZmZmZ ddlmZmZ ddlm	Z	 ddl
Z
ddlmZ dd	lmZmZmZmZmZmZ G d
d deeeeZG dd deZdS )   )*)HybridSplitQKVContainerHybridGatedMLPContainerMetaTensorContainer    )ActivationFuncTypeNormType)DeepSpeedLlama2InferenceN)	Parameter   )TransformerPolicytransformer_param_names
maybe_copymaybe_copy_qkvmaybe_copy_geglumaybe_get_lorac                       sN   e Zd Z fddZdddZdd Zdd	 Zd
d Zdd Zdd Z	  Z
S )DS_LLAMA2Containerc                    s   t  jdi | d S )N )super__init__)selfkwargs	__class__r   ]/home/ubuntu/.local/lib/python3.10/site-packages/deepspeed/module_inject/containers/llama2.pyr      s   zDS_LLAMA2Container.__init__Nc                 C   sP   |d ur|n| j }d|_d|_| j| j |_| jjjj	|_
t|| jd| _| jS )NFT)mp_group)ds_model_configrotate_halfrotate_every_twohidden_sizenum_attention_heads
rotary_dimpolicyclient_module	attention
n_kv_headsnum_kvr	   r   module)r   config_configr   r   r   create_module   s   z DS_LLAMA2Container.create_modulec                 C   sh   dd | j jjjj| j jjjj| j jjjj| j jjjj| j jjj	j| j jjj
j| j jjjjfD | _dS )zD
        Necessary to implement for `HybridEngineContainer`
        c                 S   s   g | ]}t |qS r   )r   ).0pr   r   r   
<listcomp>.   s    z6DS_LLAMA2Container.set_lora_params.<locals>.<listcomp>N)r"   r#   feed_forwardw3weightw1w2r$   wqwkwvwolora_paramsr   r   r   r   set_lora_params*   s   z"DS_LLAMA2Container.set_lora_paramsc           	      C   sV   |   \}}}}}}}|| jf|| jf|| jf|| jf|| jf|| jf|| jfg}|S N)get_lora_params
inter_up_winter_gate_w_4hh_wdense_wqwkwvw)	r   up_proj_loragate_proj_loradown_proj_loraq_lorak_lorav_loraout_loraretr   r   r   get_lora_matched_pair7   s
    z(DS_LLAMA2Container.get_lora_matched_pairc                 C   sF   | j jjjj| _d| _| j jjjj| _d| _	| j jjj
j| _d| _dS )zF
        Necessary to implement for `HybridSplitQKVContainer`
        N)r"   r#   r$   r3   r0   r@   qbr4   rA   kbr5   rB   vbr8   r   r   r   	set_q_k_v=   s   
zDS_LLAMA2Container.set_q_k_vc                 C   s0   | j jjjj| _d| _| j jjjj| _d| _	dS )zF
        Necessary to implement for `HybridGatedMLPContainer`
        N)
r"   r#   r.   r2   r0   r<   
inter_up_br1   r=   inter_gate_br8   r   r   r   set_mlp_gateH   s   
zDS_LLAMA2Container.set_mlp_gatec                 C   s   d}t |j|||d||d  ||d  ||d  g| jjd tddD ]}t|j|||t|d  |||   q$t|j|||d	||d  ||d
  g t|j|||d||d   t|j|||td ||d   t||||td ||d   d S )N)	zattention.wq.weightzattention.wk.weightzattention.wv.weightzattention.wo.weightzfeed_forward.w3.weightzfeed_forward.w1.weightzfeed_forward.w2.weightzffn_norm.weightzattention_norm.weight	attn_qkvwr   r   r   )	split_qkv      inter_w   output_w         
   )	r   r$   r"   rT   ranger   r   r   mlp)r   r'   sdweight_quantizer
mp_replaceprefixparam_namesir   r   r   load_paramsQ   s$   "
 "zDS_LLAMA2Container.load_paramsr:   )__name__
__module____qualname__r   r*   r9   rK   rO   rR   rf   __classcell__r   r   r   r   r      s    
	r   c                       sB   e Zd Zd fdd	Zdd ZdddZdd	d
Zdd Z  ZS )LLAMA2LayerPolicyTc                    sH   t  j|tjtjd || _zdd l}|jj	t
_W d S    d t
_Y d S )N)mlp_act_func_type	norm_typer   )r   r   r   
GATED_SILUr   RMSNormr#   llamamodelTransformerBlockrk   _orig_layer_class)r   r#   	inferencerp   r   r   r   r   q   s   zLLAMA2LayerPolicy.__init__c                 C   sL   | j jjjjd | j j| j jjtj	
 r#| j jjjjd tj	  fS dfS )Nr   r   )r#   r$   r3   r0   shapen_headsffn_normeps	deepspeedcommis_initializedr.   r1   get_world_sizer8   r   r   r   get_hidden_heads~   s   z"LLAMA2LayerPolicy.get_hidden_headsFc                 C   sT   | j jjj}| j jjj}| j jjj}ttj|||fdd|d}|d | j jj	jd fS Nr   )dim)requires_grad)
r#   r$   r3   r0   r4   r5   r
   torchcatr6   )r   enable_trainingr@   rA   rB   qkvwr   r   r   r$      s   
zLLAMA2LayerPolicy.attentionc                 C   sJ   | j jjj}| j jjj}| j jjj}ttj||fdd|d}|d |d fS r~   )	r#   r.   r/   r0   r1   r2   r
   r   r   )r   r   mlp1_up	mlp1_gatemlp2mlp1r   r   r   r_      s
   zLLAMA2LayerPolicy.mlpc                 C   s   | j jjd | j jjd fS r:   )r#   rw   r0   attention_normr8   r   r   r   	layernorm   s
   zLLAMA2LayerPolicy.layernorm)T)F)	rg   rh   ri   r   r}   r$   r_   r   rj   r   r   r   r   rk   o   s    

	rk   )basefeaturesr   r   r   deepspeed.utils.typesr   r   6deepspeed.model_implementations.transformers.ds_llama2r	   r   torch.nn.parameterr
   r"   r   r   r   r   r   r   BaseTransformerContainerr   rk   r   r   r   r   <module>   s    
X