o
    پi5                     @   s   d Z ddlmZmZmZ ddlZddlmZ ddlmZ ddl	m
Z
 ddlmZ ddlmZ dd	lmZmZ dd
lmZ ddlmZ ddlmZmZ ddlmZ G dd dejZG dd dejZegZdS )z@Ernie4.5 MTP model compatible with baidu/ERNIE-4.5-*-PT weights.    )IterableOptionalTupleN)nn)Ernie4_5_MoeConfig)RMSNorm)LogitsProcessor)QuantizationConfig)ParallelLMHeadVocabParallelEmbedding)ForwardBatch)default_weight_loader)Ernie4_5_ForCausalLMErnie4DecoderLayer)
add_prefixc                       sd   e Zd Z	ddedededee ddf
 fddZ	dd	e	j
d
e	j
dede	j
de	j
f
ddZ  ZS )Ernie4ModelMTPNconfiglayer_idprefixquant_configreturnc                    s   t    t|j|j|td|d| _t|j|jd| _	t|j|jd| _
tj|jd |j|jd| _t|||td|dd| _d S )	Nembed_tokensr   r   )eps   )bias	mtp_blockT)r   r   r   r   is_mtp)super__init__r   
vocab_sizehidden_sizer   r   r   rms_norm_epsmtp_emb_normmtp_hidden_normr   Linearuse_biasmtp_linear_projr   r   )selfr   r   r   r   	__class__ R/home/ubuntu/.local/lib/python3.10/site-packages/sglang/srt/models/ernie4_eagle.pyr   '   s&   
zErnie4ModelMTP.__init__	input_ids	positionsforward_batchinput_embedsc                 C   sr   |d u r
|  |}n|}d||dk< | tj| || |jjfdd}d }| j||||d\}}|| }|S )Nr   )dim)r.   hidden_statesr/   residual)	r   r'   torchcatr#   r$   	spec_infor3   r   )r(   r-   r.   r/   r0   r3   r4   r+   r+   r,   forwardC   s*   	
zErnie4ModelMTP.forwardN)__name__
__module____qualname__r   intstrr   r	   r   r5   Tensorr   r8   __classcell__r+   r+   r)   r,   r   &   s2    !r   c                   @   s   e Zd Z			ddedee dededdf
d	d
Ze	
 de	jde	jdede	jfddZdeeee	jf  fddZdd Zdd ZdS )Ernie4_5_MoeForCausalLMMTPN r   r   r   r   mtp_layer_idr   c                 C   sh   t j|  || _|| _t|| j|td|d| _|jr"| jj	| _
nt|j|j|dd| _
t|| _d S )Nmodel)r   r   r   r   lm_headr   )r   Moduler   r   rC   r   r   rD   tie_word_embeddingsr   rE   r
   r    r!   r   logits_processor)r(   r   r   r   rC   r+   r+   r,   r   f   s$   z#Ernie4_5_MoeForCausalLMMTP.__init__r-   r.   r/   c                 C   s    |  |||}| ||| j|S r9   )rD   rH   rE   )r(   r-   r.   r/   r3   r+   r+   r,   r8      s   
z"Ernie4_5_MoeForCausalLMMTP.forwardweightsc                 C   s  d}d| j  d| j  d| j  d| j  g}t|  }|D ]]\}}|D ]
}||v r0d} nq&q |d| j  dd}tjD ]\}}	}
|	|vrJq@||	|}|| }|j}||||
  n|| v ru|| }t|dt	}||| q t
d	| d
q |st
d| j  dd S )NFz
mtp_block.zmtp_emb_norm.zmtp_hidden_norm.zmtp_linear_proj.T.weight_loaderzParameter 'z' not found in MTP model.zMTP layers 'mtp_*.z.*' not found in weights.)rC   dictnamed_parametersreplacer   stacked_params_mappingrK   keysgetattrr   KeyError)r(   rI   mtp_layer_foundmtp_weight_patternsparams_dictnameloaded_weightlayer_pattern
param_nameweight_nameshard_idparamrK   r+   r+   r,   load_weights   sP   




z'Ernie4_5_MoeForCausalLMMTP.load_weightsc                 C   s   | j jj| jjfS r9   )rD   r   weightrE   )r(   r+   r+   r,   get_embed_and_head   s   z-Ernie4_5_MoeForCausalLMMTP.get_embed_and_headc                 C   sL   | j j`|| j j_| jjr| j j| _n| j`|| j_tj  tj	  d S r9   )
rD   r   r^   r   rG   rE   r5   cudaempty_cachesynchronize)r(   embedheadr+   r+   r,   set_embed_and_head   s   

z-Ernie4_5_MoeForCausalLMMTP.set_embed_and_head)NrB   r   )r:   r;   r<   r   r   r	   r>   r=   r   r5   no_gradr?   r   r8   r   r   r]   r_   re   r+   r+   r+   r,   rA   e   s8    
-rA   )__doc__typingr   r   r   r5   r   ;transformers.models.ernie4_5_moe.configuration_ernie4_5_moer   sglang.srt.layers.layernormr   "sglang.srt.layers.logits_processorr   *sglang.srt.layers.quantization.base_configr	   *sglang.srt.layers.vocab_parallel_embeddingr
   r   ,sglang.srt.model_executor.forward_batch_infor   $sglang.srt.model_loader.weight_utilsr   sglang.srt.models.ernie4r   r   sglang.srt.utilsr   rF   r   rA   
EntryClassr+   r+   r+   r,   <module>   s    ?
f