o
    
۾ic8                     @   s8  d Z ddlZddlmZ ddlZddlmZ ddlmZ ddlm	Z	 ddl
mZmZ ddlmZ dd	lmZ dd
lmZ ddlmZmZ ddlmZ ddlmZ ddlmZmZmZ ddlmZ  ddlm!Z" ddlm#Z$ ddl%m&Z&m'Z'm(Z(m)Z)m*Z* G dd dej+Z,e	G dd dej+Z-G dd dej+eeeZ.dS )zFInference-only EagleMiniCPM model compatible with HuggingFace weights.    N)Iterable)nn)PretrainedConfig)support_torch_compile)CacheConfig
VllmConfig)RMSNorm)LogitsProcessor)QuantizationConfig)ParallelLMHeadVocabParallelEmbedding)default_weight_loader)IntermediateTensors   )SupportsEagleSupportsLoRA
SupportsPP)MiniCPMAttention)
MiniCPMMLP)
MiniCPMMoE)AutoWeightsLoaderis_pp_missing_parameter'make_empty_intermediate_tensors_factorymaybe_prefixprocess_eagle_weightc                       s   e Zd Z			ddededB dedB deddf
 fdd	Zd
d Zdd Z	de
jde
jde
jdB dee
je
jf fddZ  ZS )EagleMiniCPMDecoderLayerN configcache_configquant_configprefixreturnc                    sL   t    || _|| _|| _|j| _t|dd| _|| _| 	  | 
  d S )Nmax_position_embeddingsi    )super__init__r   r   r   hidden_sizegetattrr"   r    _init_attn_block_init_ffn_block)selfr   r   r   r    	__class__ \/home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/models/minicpm_eagle.pyr$   <   s   
z!EagleMiniCPMDecoderLayer.__init__c              
   C   sP   t | jj| jjd| _t| j| jj| jj| jj| j	| j
| j| j dd| _d S )Nepsz
.self_attn)r%   	num_headsnum_kv_headsrope_parametersr"   r   r   r    )r   r   r%   rms_norm_epsinput_layernormEagleMiniCPMAttentionnum_attention_headsnum_key_value_headsr2   r"   r   r   r    	self_attnr)   r,   r,   r-   r'   M   s   
z)EagleMiniCPMDecoderLayer._init_attn_blockc                 C   s   t | jj| jjd| _t| jdd| _| jdkr/t| j| jj| jj	t| jdd| j
d| _d S t| jj| jj| jj| jj| j dd| _d S )	Nr.   num_expertsr   hidden_act_paramg        )r%   intermediate_size
hidden_actr;   r   z.mlp)r:   top_kr%   r<   r    )r   r   r%   r3   post_attention_layernormr&   r:   EagleMiniCPMMLPr<   r=   r   mlpEagleMiniCPMMoEnum_experts_per_tokr    r9   r,   r,   r-   r(   \   s&   

z(EagleMiniCPMDecoderLayer._init_ffn_block	positionshidden_statesresidualc                 C   sx   |}|  |}| j||d}||| jjt| jj   }|}| |}| |}||| jjt| jj   }|d fS )N)rD   rE   )	r4   r8   r   scale_depthmathsqrtmup_denominatorr?   rA   )r)   rD   rE   rF   r,   r,   r-   forwardr   s    


z EagleMiniCPMDecoderLayer.forward)NNr   )__name__
__module____qualname__r   r   r
   strr$   r'   r(   torchTensortuplerK   __classcell__r,   r,   r*   r-   r   ;   s4    r   c                
       s   e Zd Zddddededef fddZded	ed
edB de	dB def
ddZ
dejdejfddZdejdejdejdejeB fddZdeeeejf  dee fddZ  ZS )EagleMiniCPMModelr   r   r    start_layervllm_configr    rV   c                   s   t    |jjj}|j}|j}|| _|| _|| _|j| _t	j
j| jjd | jjdd| _t|j|jd| _t|j|jd| _t| j|j| _t| jdd| _| ||||| t|j|jd| _tddg| jj| _d S )	N   F)biasr.   r:   r   rE   rF   )r#   r$   speculative_configdraft_model_config	hf_configr   r   r   
vocab_sizerP   r   Linearr%   fcr   r3   input_norm1input_norm2r   embed_tokensr&   r:   _init_layersnormr   make_empty_intermediate_tensors)r)   rW   r    rV   r   r   r   r*   r,   r-   r$      s.   


zEagleMiniCPMModel.__init__r   r   Nr   c                    s.   t  fddt| jjD | _d S )Nc              
      s(   g | ]}t   d |  qS )z.eagle_layers.)r   ).0ir   r   r    r   rV   r,   r-   
<listcomp>   s    z2EagleMiniCPMModel._init_layers.<locals>.<listcomp>)r   
ModuleListranger   num_hidden_layerseagle_layers)r)   r    r   r   r   rV   r,   rh   r-   rc      s
   

zEagleMiniCPMModel._init_layers	input_idsr!   c                 C   s   |  |}|| jj S N)rb   r   	scale_emb)r)   rn   	embeddingr,   r,   r-   embed_input_ids   s   
z!EagleMiniCPMModel.embed_input_idsrD   rE   c                 C   s^   |  |}| |}| |}| tj||fdd}d }| jD ]
}||||\}}q ||fS )N)dim)rr   r`   ra   r_   rP   catrm   )r)   rn   rD   rE   input_embedsrF   layerr,   r,   r-   rK      s   




zEagleMiniCPMModel.forwardweightsc                 C   sX  g d}dd t | jD }t|  }t }|D ]\}}d|v r"qd|v s*d|v r+q|D ].\}}	}
|	|vr7q-||	|}|drG||vrGq-t|| rMq-|| }|j}||||
  nH|D ]&\}}	}|	|vrhq^||	|}t|| rtq^|| }|j}||||	|d  n|dr||vrqt|| rq|| }t	|d	t
}||| || q|S )
N))qkv_projq_projq)ry   k_projk)ry   v_projv)gate_up_proj	gate_projr   )r   up_projr   c              	   S   s:   g | ]}d D ]}|dv rdndd| d| d|fqqS ))w1w2w3)r   r   wsw2szexperts..z.weightr,   )rf   	expert_idweight_namer,   r,   r-   ri      s    z2EagleMiniCPMModel.load_weights.<locals>.<listcomp>zrotary_emb.inv_freqzrotary_emb.cos_cachedzrotary_emb.sin_cachedz.bias)r   weight_loader)rk   r:   dictnamed_parameterssetreplaceendswithr   r   r&   r   add)r)   rx   stacked_params_mappingexpert_params_mappingparams_dictloaded_paramsnameloaded_weight
param_namer   shard_idparamr   r   r,   r,   r-   load_weights   s\   




zEagleMiniCPMModel.load_weights)rL   rM   rN   r   rO   intr$   r   r   r
   rc   rP   rQ   rr   r   rK   r   rR   r   r   rS   r,   r,   r*   r-   rT      s>    

,rT   c                
       s   e Zd Zg dddgdZdddZdd	d
edef fddZdddd
ededefddZ	de
jde
jfddZde
jde
jde
jdee
je
jf fddZde
jde
jdB fddZdeeee
jf  dee fdd Z  ZS )!EagleMiniCPMForCausalLM)rz   r|   r~   r   r   )ry   r   input_embeddingsoutput_embeddings)rb   lm_headr   )r    rW   r    c                   s   t    |jjj}|j}|j}|| _|| _|| _	|| _|| _|j
|j}| j|t|d|d| _t|j|j|t|dd| _|jrL| j| jj| _| j	j| j	j | _t|j| _| jj| _d S )NmodelrW   r    rV   r   )r   r    )r#   r$   rZ   r[   r\   r   r   r    rW   r   model_configget_num_layersparallel_config_init_modelr   r   r   r]   r%   r   tie_word_embeddingstie_weightsrb   dim_model_basescale_widthr	   logits_processorre   )r)   rW   r    r   r   r   target_layer_numr*   r,   r-   r$   6  s:   

z EagleMiniCPMForCausalLM.__init__r   rU   rV   c                C   s   t |||dS )Nr   )rT   )r)   rW   r    rV   r,   r,   r-   r   \  s   z#EagleMiniCPMForCausalLM._init_modelrn   r!   c                 C   s   | j |S ro   )r   rr   )r)   rn   r,   r,   r-   rr   c  s   z'EagleMiniCPMForCausalLM.embed_input_idsrD   rE   c                 C   s.   |  |||\}}|| j }|| j }||fS ro   )r   r   )r)   rn   rD   rE   hidden_states2r,   r,   r-   rK   f  s   

zEagleMiniCPMForCausalLM.forwardNc                 C   s   |  | j|}|S ro   )r   r   )r)   rE   logitsr,   r,   r-   compute_logitsq  s   z&EagleMiniCPMForCausalLM.compute_logitsrx   c                    s6    fdd}t   jjrdgnd d}|t||S )Nc                    s   | \}}t  | ||fS ro   )r   )inputsr   r   r9   r,   r-   	transformy  s   
z7EagleMiniCPMForCausalLM.load_weights.<locals>.transformzlm_head.)skip_prefixes)r   r   r   r   map)r)   rx   r   loaderr,   r9   r-   r   x  s   z$EagleMiniCPMForCausalLM.load_weights)rL   rM   rN   packed_modules_mappingembedding_modulesr   rO   r$   r   r   rP   rQ   rr   rR   rK   r   r   r   r   rS   r,   r,   r*   r-   r   #  sD    '


,r   )/__doc__rH   collections.abcr   rP   r   transformersr   vllm.compilation.decoratorsr   vllm.configr   r   $vllm.model_executor.layers.layernormr   +vllm.model_executor.layers.logits_processorr	   'vllm.model_executor.layers.quantizationr
   3vllm.model_executor.layers.vocab_parallel_embeddingr   r   -vllm.model_executor.model_loader.weight_utilsr   vllm.sequencer   
interfacesr   r   r   minicpmr   r5   r   r@   r   rB   utilsr   r   r   r   r   Moduler   rT   r   r,   r,   r,   r-   <module>   s0   	S 