o
    
۾i"                     @   s   d dl mZ d dlZd dlmZ d dlmZ d dlmZ d dl	m
Z
 d dlmZ d dlmZ d dlmZ d d	lmZ d d
lmZmZ d dlmZ d dlmZmZ d dlmZ ddlmZ ddlm Z m!Z!m"Z" e
e#Z$eG dd dej%Z&G dd deZ'dS )    )IterableN)support_torch_compile)
VllmConfig)init_logger)RMSNorm)LogitsProcessor)QuantizationConfig)TorchAOConfig)ParallelLMHeadVocabParallelEmbedding)default_weight_loader)Llama4DecoderLayerLlama4ForCausalLM)extract_layer_index   )SupportsMultiModal)AutoWeightsLoadermaybe_prefixprocess_eagle_weightc                       s   e Zd ZdddddededededB d	df
 fd
dZdej	d	ej	fddZ
	ddej	dB dej	dej	dej	dB d	eej	ej	f f
ddZdeeeej	f  d	ee fddZ	ddededB d	dfddZ  ZS )
LlamaModel r   N)prefixstart_layer_idquant_configvllm_configr   r   r   returnc                   s   t    jjj_| jj_tjjjj	t
 dd_j}|_zt fddtjjD _W |_n|_w tjjjj	d jj	dd_tjj	jjd_d S )	Nembed_tokensr   c              	      s,   g | ]}t t d |  jdqS )zlayers.)r   r   config)r   r   r   ).0ir   selfr   r    [/home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/models/llama4_eagle.py
<listcomp>F   s    z'LlamaModel.__init__.<locals>.<listcomp>   F)bias)eps)super__init__speculative_configdraft_model_config	hf_configr   validate_and_update_config
vocab_sizer   hidden_sizer   r   r   nn
ModuleListrangenum_hidden_layerslayerstorchLinearfcr   rms_norm_epsnorm)r"   r   r   r   r   original_quant_config	__class__r!   r$   r*   /   s,   


zLlamaModel.__init__	input_idsc                 C   s
   |  |S N)r   )r"   r>   r#   r#   r$   embed_input_idsW   s   
zLlamaModel.embed_input_ids	positionshidden_statesinputs_embedsc                 C   sb   |d u r	|  |}| tj||fdd}d }| jD ]
}||||\}}q| ||\}}||fS )N)dim)r@   r8   r6   catr5   r:   )r"   r>   rA   rB   rC   residuallayer_r#   r#   r$   forwardZ   s   


zLlamaModel.forwardweightsc                 C   s   g d}t |  }t }|D ]>\}}|d}|D ]\}}}	||vr$q|||}|| }
|
j}||
||	  n|| }
t|
dt}||
| || q|D ]}||v s]J | dqP|S )N))	.qkv_projz.q_projq)rL   z.k_projk)rL   z.v_projv).gate_up_projz
.gate_projr   )rP   z.up_projr   model.weight_loaderz is not loaded!)	dictnamed_parameterssetremoveprefixreplacerR   getattrr   add)r"   rK   stacked_params_mappingparams_dictloaded_paramsnameloaded_weight
param_nameweight_nameshard_idparamrR   r#   r#   r$   load_weightsn   s(   

zLlamaModel.load_weightsc                    s   | j jd u sJ | j jd u sJ t| j jdksJ dg | j j | j _t|trGdtdtffdd |j	} fdd|j
 D |_
d S d S )Nr   rH   r   c                    s    t | }| t|t|  S r?   )r   rW   str)rH   layer_index)r   r#   r$   pad_layer_name   s   z=LlamaModel.validate_and_update_config.<locals>.pad_layer_namec                    s   i | ]	\}} ||qS r#   r#   )r   rH   quantization)rf   r#   r$   
<dictcomp>   s    z9LlamaModel.validate_and_update_config.<locals>.<dictcomp>)r   yoco_global_kv_layeryoco_local_kv_layerlen
moe_layersno_rope_layers
isinstancer	   rd   torchao_configmodule_fqn_to_configitems)r"   r   r   ro   r#   )rf   r   r$   r.      s   

z%LlamaModel.validate_and_update_configr?   )__name__
__module____qualname__r   rd   intr   r*   r6   Tensorr@   tuplerJ   r   rU   rc   r.   __classcell__r#   r#   r<   r$   r   -   sJ    (
$r   c                   @   s   e Zd ZdddedefddZdejjfdd	Z	e
jZ	
ddejdejdejdejd
B deejejf f
ddZdeeeejf  dd
fddZd
S )EagleLlama4ForCausalLMr   r   r   r   c                C   s   t j|  |jjj| _|j|j	}t
|jj|j}t|d||d| _t| jdd}t| jj|d| _t| jj| jjt|dd| _|   d S )Nmodel)r   r   r   r   logit_scaleg      ?)scalelm_headr   )r1   Moduler*   r+   r,   r-   r   model_configget_num_layersparallel_configr   get_quantization_configload_configr   rz   rX   r   r/   logits_processorr
   draft_vocab_sizer0   r   r}   set_moe_parameters)r"   r   r   target_layer_numr   r{   r#   r#   r$   r*      s0   
zEagleLlama4ForCausalLM.__init__r   c                 C   s   | j S r?   rz   r"   r#   r#   r$   get_language_model   s   z)EagleLlama4ForCausalLM.get_language_modelNr>   rA   rB   rC   c                 C   s   |  ||||S r?   r   )r"   r>   rA   rB   rC   r#   r#   r$   rJ      s   zEagleLlama4ForCausalLM.forwardrK   c                    s,    fdd}t  g d}|t|| d S )Nc                    s:   | \}}  ||\}}d|vrd| }t | ||fS )Nr}   rQ   )permute_qk_weight_for_rotaryr   )inputsr]   r^   weightr   r#   r$   	transform   s   
z6EagleLlama4ForCausalLM.load_weights.<locals>.transform)skip_prefixes)r   rc   map)r"   rK   r   loaderr#   r   r$   rc      s   z#EagleLlama4ForCausalLM.load_weightsr?   )rr   rs   rt   r   rd   r*   r6   r1   r~   r   r   r@   rv   rw   rJ   r   rc   r#   r#   r#   r$   ry      s"    
$	ry   )(collections.abcr   r6   torch.nnr1   vllm.compilation.decoratorsr   vllm.configr   vllm.loggerr   $vllm.model_executor.layers.layernormr   +vllm.model_executor.layers.logits_processorr   'vllm.model_executor.layers.quantizationr   /vllm.model_executor.layers.quantization.torchaor	   3vllm.model_executor.layers.vocab_parallel_embeddingr
   r   -vllm.model_executor.model_loader.weight_utilsr   !vllm.model_executor.models.llama4r   r    vllm.model_executor.models.utilsr   
interfacesr   utilsr   r   r   rr   loggerr~   r   ry   r#   r#   r#   r$   <module>   s(   x