o
    
۾i$                     @   s   d dl mZ d dlZd dlmZ d dlmZ d dlmZ d dl	m
Z
 d dlmZ d dlmZ d dlmZmZ d d	lmZmZ d d
lmZmZ ddlmZmZmZ eG dd dejZG dd deZdS )    )IterableN)support_torch_compile)
VllmConfig)FusedMoE)RMSNorm)LogitsProcessor)ParallelLMHeadVocabParallelEmbedding)default_weight_loadermaybe_remap_kv_scale_name)DeepseekV2DecoderLayerDeepseekV3ForCausalLM   )AutoWeightsLoadermaybe_prefixprocess_eagle_weightc                
       s   e Zd Zddddedededdf fd	d
ZdejdejfddZ	dejdejdejde
ejejf fddZdee
eejf  dee fddZ  ZS )DeepseekV2Model r   )prefixstart_layer_idvllm_configr   r   returnNc                   s   t    jjj_j}jj_tjjjj	|t
 dd_t fddtjjD _tjjjj	d jjj	dd_tjj	jjd_tjj	jjd_tjj	jjd_d S )	Nembed_tokensquant_configr   c              	      s,   g | ]}t t d |  jdqS )zlayers.)r   config)r   r   r   ).0ir   selfr   r    ]/home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/models/deepseek_eagle.py
<listcomp>4   s    z,DeepseekV2Model.__init__.<locals>.<listcomp>   F)bias)eps)super__init__speculative_configdraft_model_config	hf_configr   r   
vocab_sizer	   hidden_sizer   r   nn
ModuleListrangenum_hidden_layerslayersLinearmodelfcr   rms_norm_epsenormhnormnorm)r   r   r   r   r   	__class__r   r!   r'       s.   


zDeepseekV2Model.__init__	input_idsc                 C   s
   |  |S N)r   r   r;   r    r    r!   embed_input_idsH   s   
zDeepseekV2Model.embed_input_ids	positionshidden_statesc           	      C   sj   |  |}tj| || |gdd}| |}d }| jD ]
}||||\}}q| ||\}}||fS )N)dim)r   torchcatr6   r7   r4   r1   r8   )	r   r;   r?   r@   input_embedsinputsresiduallayer_r    r    r!   forwardK   s   



zDeepseekV2Model.forwardweightsc              	   C   sj  g d}t j| ddd| jjd}t|  }t }|D ]\}}d|v r$q|D ]<\}}	}
|	|vr0q&d|v r9||vr9q&||	|}|dkrH||vrHq&|}|d	rT||vrTq&|| }|j	}||||
  nJ|D ]$}|\}}	}}
|	|vrrqe||	|}|| }|j	}|||||
|d
  n#|d	r||vrqt
||}|d u rq|| }t|dt}||| || q|S )N))gate_up_proj	gate_projr   )rL   up_projr   )fused_qkv_a_projq_a_projr   )rO   kv_a_proj_with_mqar   rM   	down_projrN   )ckpt_gate_proj_nameckpt_down_proj_nameckpt_up_proj_namenum_expertszrotary_emb.inv_freqzmlp.experts.rO   z.bias)shard_id	expert_idweight_loader)r   make_expert_params_mappingr   n_routed_expertsdictnamed_parameterssetreplaceendswithrY   r   getattrr
   add)r   rK   stacked_params_mappingexpert_params_mappingparams_dictloaded_paramsnameloaded_weight
param_nameweight_namerW   name_mappedparamrY   mappingrX   r    r    r!   load_weightsa   sp   


zDeepseekV2Model.load_weights)__name__
__module____qualname__r   strintr'   rC   Tensorr>   tuplerJ   r   r^   rn   __classcell__r    r    r9   r!   r      s.    (
,r   c                   @   s   e Zd ZdddedefddZdejdejfd	d
Z	ddejdejdejdejdB de	ejejf f
ddZ
dejdejdB fddZdee	eejf  fddZdS )EagleDeepseekV3ForCausalLMr   )r   r   r   c                C   s   t j|  |jjj| _|j}|j	|j
}t|d|d| _t| jj| jj|t|dd| _t| jdd}t| jj|d| _| jj| _|   d S )Nr3   )r   r   r   lm_headr   logit_scaleg      ?)scale)r-   Moduler'   r(   r)   r*   r   r   model_configget_num_layersparallel_configr   r3   r   r+   r,   r   rx   ra   r   logits_processorr0   num_moe_layersset_moe_parameters)r   r   r   r   target_layer_numry   r    r    r!   r'      s*   
z#EagleDeepseekV3ForCausalLM.__init__r;   r   c                 C   s   | j |S r<   )r3   r>   r=   r    r    r!   r>      s   z*EagleDeepseekV3ForCausalLM.embed_input_idsNr?   r@   inputs_embedsc                 C   s*   |d urt t| j d| |||S )Nz( does not support multimodal inputs yet.)NotImplementedErrortypero   r3   )r   r;   r?   r@   r   r    r    r!   rJ      s
   z"EagleDeepseekV3ForCausalLM.forwardc                 C   s   |  | j|}|S r<   )r   rx   )r   r@   logitsr    r    r!   compute_logits   s   z)EagleDeepseekV3ForCausalLM.compute_logitsrK   c                    s,    fdd}t  d d}|t|| d S )Nc                    s*   | \}}d|vrd| }t  | ||fS )Nrx   zmodel.)r   )rF   rg   rh   r   r    r!   	transform   s
   
z:EagleDeepseekV3ForCausalLM.load_weights.<locals>.transform)skip_prefixes)r   rn   map)r   rK   r   loaderr    r   r!   rn      s   z'EagleDeepseekV3ForCausalLM.load_weightsr<   )ro   rp   rq   r   rr   r'   rC   rt   r>   ru   rJ   r   r   rn   r    r    r    r!   rw      s*    

 rw   )collections.abcr   rC   torch.nnr-   vllm.compilation.decoratorsr   vllm.configr   $vllm.model_executor.layers.fused_moer   $vllm.model_executor.layers.layernormr   +vllm.model_executor.layers.logits_processorr   3vllm.model_executor.layers.vocab_parallel_embeddingr   r	   -vllm.model_executor.model_loader.weight_utilsr
   r   &vllm.model_executor.models.deepseek_v2r   r   utilsr   r   r   r{   r   rw   r    r    r    r!   <module>   s      