o
    پi%                     @   s  d Z ddlmZ 	 ddlZddlmZmZmZ ddlZddlm	Z	 ddl
mZ ddlmZ ddlmZ dd	lmZ dd
lmZ ddlmZ ddlmZmZ ddlmZmZ ddlmZ ddlmZm Z m!Z! G dd deZG dd de	j"Z#G dd de Z$e$gZ%dS )a-  
Copyright 2023-2024 SGLang Team
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
    )
add_prefixN)IterableOptionalTuple)nn)LlamaConfig)get_pp_group)RMSNorm)QKVParallelLinear)LogitsProcessor)QuantizationConfig)ParallelLMHeadVocabParallelEmbedding)ForwardBatchPPProxyTensors)default_weight_loader)LlamaDecoderLayerLlamaForCausalLMLlamaMLPc                       sx   e Zd Z			ddededee deddf
 fd	d
Zde	j
de	j
de	j
dedee	j
 dee	j
e	j
f fddZ  ZS )r   r   N configlayer_idquant_configprefixreturnc              
      s   t  |||| td| j | jj| jj| jjd|td|d| j_	|j
dkr+|j}n|j}t|j||j||| _t|j|jd| _d S )N   Fqkv_proj)biasr   r   llama4_texteps)super__init__r
   hidden_size	self_attnhead_dimtotal_num_headstotal_num_kv_headsr   r   
model_typeintermediate_size_mlpintermediate_sizer   
hidden_actmlpr	   rms_norm_epshidden_norm)selfr   r   r   r   
inter_size	__class__ R/home/ubuntu/.local/lib/python3.10/site-packages/sglang/srt/models/llama_eagle3.pyr"   ,   s"   


zLlamaDecoderLayer.__init__	positionsembedshidden_statesforward_batchresidualc                 C   s\   |}|  |}| |}tj||gdd}| j|||d}| ||\}}| |}||fS )N)dim)r5   r7   r8   )input_layernormr.   torchcatr$   post_attention_layernormr,   )r/   r5   r6   r7   r8   r9   r3   r3   r4   forwardK   s   	


zLlamaDecoderLayer.forward)r   Nr   )__name__
__module____qualname__r   intr   r   strr"   r=   Tensorr   r   r@   __classcell__r3   r3   r1   r4   r   +   s8    r   c                       sl   e Zd Z		ddedee deddf fddZ		dd	ej	d
ej	de
dej	dee dej	fddZ  ZS )
LlamaModelNr   r   r   r   r   c                    s   t    || _t|do|jd uod|jv | _| jr d|jd< |j| _t|j|jt	d|d| _
t|dr;|j| _n|j| _tjj| jd |jt|d	d
d| _t|d||| _t|j|jd| _d S )Nrope_scalingmrope_sectiondefault	rope_typeembed_tokens)r   target_hidden_size   r   F)r   r   r   )r!   r"   r   hasattrrI   is_mrope_enabled
vocab_sizer   r#   r   rM   rN   hidden_size_inr=   r   Lineargetattrfcr   midlayerr	   r-   norm)r/   r   r   r   r1   r3   r4   r"   i   s2   





zLlamaModel.__init__	input_idsr5   r8   input_embedspp_proxy_tensorsc                 C   s   |d u r
|  |}n|}| jr|j}|jj}|jd |jd kr%| |}|jd dkr1||gfS d }| |||||\}}| ||\}	}
|	|
gfS )Nr:   r   )	rM   rQ   mrope_positions	spec_infor7   shaperV   rW   rX   )r/   rY   r5   r8   rZ   r[   r6   r7   r9   hidden_states_to_logitshidden_states_to_auxr3   r3   r4   r@      s,   


zLlamaModel.forwardNr   )NN)rA   rB   rC   r   r   r   rE   r"   r=   rF   r   r   r@   rG   r3   r3   r1   r4   rH   h   s6    -rH   c                	   @   sX   e Zd Z		ddedee deddfddZd	ee	ee
jf  ddfd
dZdd ZdS )LlamaForCausalLMEagle3Nr   r   r   r   r   c                 C   s   t j|  || _|| _t | _| jjdkrtdt	||t
d|d| _d| _| jjr2| jj| _n|jd u r>d| _|j|_t|j|j|t
d|d| _t|}|j|_t|| _d| _d | _d S )N   z&EAGLE3 currently only supports 1 layermodel)r   r   FTlm_head)r   Moduler"   r   r   r   pp_groupnum_hidden_layers
ValueErrorrH   r   rd   load_lm_head_from_targettie_word_embeddingsrM   re   draft_vocab_sizerR   r   r#   copydeepcopyr   logits_processorcapture_aux_hidden_stateshot_token_id)r/   r   r   r   config_r3   r3   r4   r"      s6   



zLlamaForCausalLMEagle3.__init__weightsc                 C   s   t |  }g d}|D ]j\}}d|v r |t|jd  | _qd|v r%q|D ]0\}}}||vr1q'|||}||vr@d| n|}||v rV|| }	t|	dt}
|
|	||  n||v r^|nd| }||v rv|| }	t|	dt}
|
|	| qd S )N))	.qkv_projz.q_projq)rt   z.k_projk)rt   z.v_projv).gate_up_projz
.gate_projr   )rx   z.up_projrc   d2tr   t2dzmodel.weight_loader)	dictnamed_parametersr=   aranger^   rq   replacerU   r   )r/   rs   params_dictstacked_params_mappingnameloaded_weight
param_nameweight_nameshard_idparamr{   r3   r3   r4   load_weights   s:   	
z#LlamaForCausalLMEagle3.load_weightsc                 C   s   | j S )N)rq   )r/   r3   r3   r4   get_hot_token_id  s   z'LlamaForCausalLMEagle3.get_hot_token_idra   )rA   rB   rC   r   r   r   rE   r"   r   r   r=   rF   r   r   r3   r3   r3   r4   rb      s    
 *+rb   )&__doc__sglang.srt.utilsr   rm   typingr   r   r   r=   r   transformersr   sglang.srt.distributedr   sglang.srt.layers.layernormr	   sglang.srt.layers.linearr
   "sglang.srt.layers.logits_processorr   *sglang.srt.layers.quantization.base_configr   *sglang.srt.layers.vocab_parallel_embeddingr   r   ,sglang.srt.model_executor.forward_batch_infor   r   $sglang.srt.model_loader.weight_utilsr   sglang.srt.models.llamar   r   r   rf   rH   rb   
EntryClassr3   r3   r3   r4   <module>   s*    =R
Z