o
    TiQ                     @   s   d dl Z d dlZd dlmZ d dlmZ d dlmZmZ ddl	m
Z
mZmZmZmZmZ ddlT dd	lmZmZ G d
d deeeZG dd de
ZdS )    N)	Parameter)DeepSpeedGPTInference)ActivationFuncTypeNormType   )TransformerPolicy
maybe_copymaybe_copy_geglumaybe_copy_qkvmaybe_get_loratransformer_param_names   )*)HybridGatedMLPContainerHybridSplitQKVContainerc                       sN   e Zd Z fddZdddZdd Zdd	 Zd
d Zdd Zdd Z	  Z
S )DS_InternLMContainerc                    s   t  jdi | d S )N )super__init__)selfkwargs	__class__r   _/home/ubuntu/.local/lib/python3.10/site-packages/deepspeed/module_inject/containers/internlm.pyr      s   zDS_InternLMContainer.__init__Nc                 C   sB   |d ur|n| j }d|_d|_| j| j |_t|| jd| _| jS )NTF)mp_group)	ds_model_configrotate_halfrotate_every_twohidden_sizenum_attention_heads
rotary_dimr   r   module)r   config_configr   r   r   create_module   s   z"DS_InternLMContainer.create_modulec                 C   sh   dd | j jjjj| j jjjj| j jjjj| j jjjj| j jjj	j| j jjj
j| j jjjjfD | _dS )zD
        Necessary to implement for `HybridEngineContainer`
        c                 S   s   g | ]}t |qS r   )r   ).0pr   r   r   
<listcomp>)   s    z8DS_InternLMContainer.set_lora_params.<locals>.<listcomp>N)policyclient_modulemlpup_projweight	gate_proj	down_proj	self_attnq_projk_projv_projo_projlora_paramsr   r   r   r   set_lora_params%   s   z$DS_InternLMContainer.set_lora_paramsc           	      C   sV   |   \}}}}}}}|| jf|| jf|| jf|| jf|| jf|| jf|| jfg}|S N)get_lora_params
inter_up_winter_gate_w_4hh_wdense_wqwkwvw)	r   up_proj_loragate_proj_loradown_proj_loraq_lorak_lorav_loraout_loraretr   r   r   get_lora_matched_pair2   s
    z*DS_InternLMContainer.get_lora_matched_pairc                 C   sd   | j jjjj| _| j jjjj| _| j jjjj| _	| j jjjj| _
| j jjjj| _| j jjjj| _dS )zF
        Necessary to implement for `HybridSplitQKVContainer`
        N)r(   r)   r/   r0   r,   r=   biasqbr1   r>   kbr2   r?   vbr5   r   r   r   	set_q_k_v8   s   zDS_InternLMContainer.set_q_k_vc                 C   s0   | j jjjj| _d| _| j jjjj| _d| _	dS )zF
        Necessary to implement for `HybridGatedMLPContainer`
        N)
r(   r)   r*   r+   r,   r9   
inter_up_br-   r:   inter_gate_br5   r   r   r   set_mlp_gateC   s   
z!DS_InternLMContainer.set_mlp_gatec                 C   s>  d}t |j|||d||d  ||d  ||d  g| jjd t |j|||d||d  ||d	  ||d
  g| jjd t|j|||td ||d   t|j|||td ||d   t|j|||d||d  ||d  g t|j|||d||d   t|j|||td ||d   t||||td	 ||d   d S )N)zself_attn.q_proj.weightzself_attn.k_proj.weightzself_attn.v_proj.weightzself_attn.o_proj.weightzmlp.up_proj.weightzmlp.gate_proj.weightzmlp.down_proj.weightzinput_layernorm.weightz4post_attention_layernorm.weightself_attn.q_proj.biaszself_attn.k_proj.biaszself_attn.v_proj.biaszself_attn.o_proj.bias	attn_qkvwr   r   r   )	split_qkv	attn_qkvb	   
            inter_w      output_w         )r
   	attentionr(   rR   r   r   r	   r*   )r   r!   sdweight_quantizer
mp_replaceprefixparam_namesr   r   r   load_paramsL   s6   ""

 "z DS_InternLMContainer.load_paramsr7   )__name__
__module____qualname__r   r$   r6   rH   rM   rP   rf   __classcell__r   r   r   r   r      s    

	r   c                       sR   e Zd Zg ZdZd fdd	Zdd Zdd Zdd	d
ZdddZ	dd Z
  ZS )InternLMLayerPolicyFTc                    s(   t  j|tjtjd || _|   d S )N)mlp_act_func_type	norm_type)r   r   r   
GATED_SILUr   RMSNormr)   _init_orig_layer_class_once)r   r)   	inferencer   r   r   r   y   s   zInternLMLayerPolicy.__init__c              	   C   sn   t jrd S dD ]*}zddlm} t| | d}|jt jvr't j|j W q t	y1   Y qw dt _d S )N) z.internlm-7bz.internlm-chat-7br   ) TRANSFORMERS_DYNAMIC_MODULE_NAMEz.modeling_internlmT)
rk   _orig_layer_class_initedtransformers.utilsrs   	importlibimport_moduleInternLMDecoderLayer_orig_layer_classappendImportError)r   sub_pkgrs   r!   r   r   r   rp      s   
z/InternLMLayerPolicy._init_orig_layer_class_oncec                 C   s4   | j jjjjd | j jj| j jj| j jj	jjd fS )Nr   r   )
r)   r/   r0   r,   shape	num_headsinput_layernormvariance_epsilonr*   r-   r5   r   r   r   get_hidden_heads   s
   z$InternLMLayerPolicy.get_hidden_headsc           
      C   s   | j jjj}| j jjj}| j jjj}| j jjj}| j jjj}| j jjj}ttj	|||fdd|d}ttj	|||fdd|d}	||	| j jj
j| j jj
jfS Nr   )dim)requires_grad)r)   r/   r0   r,   r1   r2   rI   r   torchcatr3   )
r   enable_trainingr=   r>   r?   rJ   rK   rL   qkvwqkvbr   r   r   r`      s   

zInternLMLayerPolicy.attentionc                 C   sJ   | j jjj}| j jjj}| j jjj}ttj||fdd|d}|d |d fS r   )	r)   r*   r+   r,   r-   r.   r   r   r   )r   r   mlp1_up	mlp1_gatemlp2mlp1r   r   r   r*      s
   zInternLMLayerPolicy.mlpc                 C   s   | j jjd | j jjd fS r7   )r)   post_attention_layernormr,   r   r5   r   r   r   	layernorm   s
   zInternLMLayerPolicy.layernorm)T)F)rg   rh   ri   ry   rt   r   rp   r   r`   r*   r   rj   r   r   r   r   rk   u   s    


	rk   )rv   r   torch.nn.parameterr   3deepspeed.model_implementations.transformers.ds_gptr   deepspeed.utils.typesr   r   r(   r   r   r	   r
   r   r   basefeaturesr   r   BaseTransformerContainerr   rk   r   r   r   r   <module>   s    a