o
    پi>                     @   s|  d Z ddlmZmZmZmZmZ ddlZddlm	  m
Z ddlm	Z	 ddlmZ ddlmZmZ ddlmZ ddlmZ dd	lmZ dd
lmZ ddlmZ ddlmZ ddlmZ ddlm Z m!Z! ddl"m#Z# ddl$m%Z% ddl&m'Z( ddl)m*Z+ ddl,m-Z-m.Z. G dd de	j/Z0G dd de	j/Z1G dd de	j/Z2G dd de	j/Z3G dd de	j/Z4G dd de4Z5e5e4gZ6dS ) zKInference-only Ernie4.5 model compatible with baidu/ERNIE-4.5-*-PT weights.    )IterableListOptionalTupleUnionN)nn)Ernie4_5_MoeConfig)$get_tensor_model_parallel_world_size tensor_model_parallel_all_reduce)enable_moe_dense_fully_dp)RMSNorm)LogitsProcessor)get_moe_impl_class)FusedMoE)TopK)QuantizationConfig)ParallelLMHeadVocabParallelEmbedding)ForwardBatch)default_weight_loader)DeepseekV2MLP)LlamaAttention)
add_prefixmake_layersc                       s.   e Zd Z	ddef fddZdd Z  ZS )MoEGate prefixc                    s@   t    tt|j|jf| _ttd|jf| _	d S )N   )
super__init__r   	Parametertorchemptymoe_num_expertshidden_sizeweighte_score_correction_bias)selfconfigr   	__class__ L/home/ubuntu/.local/lib/python3.10/site-packages/sglang/srt/models/ernie4.pyr   1   s   

zMoEGate.__init__c                 C   s   t || jd }|S N)Flinearr%   )r'   hidden_stateslogitsr+   r+   r,   forward>   s   zMoEGate.forward)r   )__name__
__module____qualname__strr   r2   __classcell__r+   r+   r)   r,   r   0   s    r   c                	       sd   e Zd Z		ddededee def fddZd	e	j
d
e	j
fddZd	e	j
d
e	j
fddZ  ZS )	Ernie4MoeNr   r(   layer_idquant_configr   c              
      s   t    || _t | _t|dd| _|jdkr!td|j dt	|t
d|d| _t|j|dd	| jjd
| _t||j|j|j|j| j|t
d|d| _| jdkrk|j|j }t|j||j|d	t
d|d| _d S d S )Nmoe_num_shared_expertsr   siluzUnsupported activation: z!. Only silu is supported for now.gate)r(   r   TF)top_kr9   renormalizeuse_grouped_topkcorrection_biasexperts)num_expertsr>   r$   intermediate_sizer9   r:   r   shared_experts)r$   rD   
hidden_actr:   reduce_resultsr   )r   r   r9   r	   tp_sizegetattrr;   rF   
ValueErrorr   r   r=   r   moe_kr&   topkr   r#   r$   moe_intermediate_sizerB   	Ernie4MLPrE   )r'   r(   r9   r:   r   rD   r)   r+   r,   r   D   sJ   




zErnie4Moe.__init__r0   returnc                 C   s
   |  |S r-   )forward_normal)r'   r0   r+   r+   r,   r2   x   s   
zErnie4Moe.forwardc                 C   sb   | j dkr
| |nd }| |}| ||}| j||d}|d ur&|| }| jdkr/t|}|S )Nr   )r0   topk_outputr   )r;   rE   r=   rL   rB   rH   r
   )r'   r0   shared_outputrouter_logitsrQ   final_hidden_statesr+   r+   r,   rP   {   s   



zErnie4Moe.forward_normalNr   )r3   r4   r5   r   intr   r   r6   r   r!   Tensorr2   rP   r7   r+   r+   r)   r,   r8   C   s    4r8   c                       sr   e Zd ZdZ			ddedee dedef fd	d
Z	de
jde
jdedee
j dee
je
jf f
ddZ  ZS )Ernie4DecoderLayerzA single transformer layer.

    Transformer layer takes input with size [s, b, h] and returns an
    output of the same size.
    Nr   Fr9   r:   r   is_mtpc                    s,  t    t|dd}t|dd }t|dd}t||j|j|j|||||j|td||j	d| _
t|d|j}	t|d	|jd
 }
|sd|	|  krL|
krdn n||	 |j dkrdt|||td|d| _nt rld\}}nd\}}t|j|j|j|td|||d| _t|j|jd| _t|j|jd| _d S )N
rope_thetai'  rope_scalingrope_is_neox_styleF	self_attn)r(   r$   	num_headsnum_kv_headsr9   rZ   r[   r\   max_position_embeddingsr:   r   biasmoe_layer_start_indexmoe_layer_end_indexr   r   mlpr(   r9   r:   r   )r   r   )NN)r$   rD   rF   r:   r   tp_rankrH   eps)r   r   rI   Ernie4Attentionr$   num_attention_headsnum_key_value_headsr`   r   use_biasr]   num_hidden_layersmoe_layer_intervalr8   rd   r   rN   rD   rF   r   rms_norm_epsinput_layernormpost_attention_layernorm)r'   r(   r9   r:   r   rY   rZ   r[   r\   rb   rc   mlp_tp_rankmlp_tp_sizer)   r+   r,   r      sb   



zErnie4DecoderLayer.__init__	positionsr0   forward_batchresidualrO   c                 C   sZ   |d u r|}|  |}n|  ||\}}| j|||d}| ||\}}| |}||fS )N)rt   r0   ru   )rp   r]   rq   rd   )r'   rt   r0   ru   rv   r+   r+   r,   r2      s   
zErnie4DecoderLayer.forward)Nr   F)r3   r4   r5   __doc__rV   r   r   r6   boolr   r!   rW   r   r   r2   r7   r+   r+   r)   r,   rX      s2    
@rX   c                       s   e Zd Z		ddedee deddf fddZe	 	dd	ej
d
ej
dedej
deej
eej
eej
 f f f
ddZ  ZS )Ernie4ModelNr   r(   r:   r   rO   c                    s^   t     | _t j jtd|d| _t j	 fdddd| _
t j jd| _d S )Nembed_tokensr:   r   c                    s   t  | |dS )Nre   )rX   )idxr   r(   r:   r+   r,   <lambda>   s    z&Ernie4Model.__init__.<locals>.<lambda>zmodel.layers)r   rg   )r   r   r(   r   
vocab_sizer$   r   rz   r   rm   layersr   ro   normr'   r(   r:   r   r)   r}   r,   r      s   
zErnie4Model.__init__	input_idsrt   ru   input_embedsc           	      C   sN   |d u r
|  |}n|}d }| jD ]}|||||\}}q| ||\}}|S r-   )rz   r   r   )	r'   r   rt   ru   r   r0   rv   layer_r+   r+   r,   r2     s   

zErnie4Model.forwardrU   r-   )r3   r4   r5   r   r   r   r6   r   r!   no_gradrW   r   r   r   r   r2   r7   r+   r+   r)   r,   ry      s2    ry   c                	       s   e Zd Zg dddgdZg dZ		dded	ee d
ef fddZ	e
 de
jde
jdede
jfddZdeeee
jf  fddZdd Z  ZS )Ernie4_5_ForCausalLM)q_projk_projv_proj	gate_projup_proj)qkv_projgate_up_proj))	.qkv_projz.q_projq)r   z.k_projk)r   z.v_projv).gate_up_projz
.gate_projr   )r   z.up_projr   Nr   r(   r:   r   c                    s`   t    || _|| _t||td|| _|jr| jj| _	nt
|j|j|dd| _	t|| _d S )Nmodellm_headr{   )r   r   r(   r:   ry   r   r   tie_word_embeddingsrz   r   r   r   r$   r   logits_processorr   r)   r+   r,   r   /  s   
zErnie4_5_ForCausalLM.__init__r   rt   ru   rO   c                 C   s    |  |||}| ||| j|S r-   )r   r   r   )r'   r   rt   ru   r0   r+   r+   r,   r2   D  s   
zErnie4_5_ForCausalLM.forwardweightsc           
      C   s   t |  }|D ]M\}}| jjrd|v rq| jD ]\}}}||vr"q|||}|| }|j}	|	|||  n|| v rM|| }t|dt	}	|	|| qt
d| dqd S )Nlm_head.weightweight_loaderParameter '' not found in model.)dictnamed_parametersr(   r   stacked_params_mappingreplacer   keysrI   r   KeyError)
r'   r   params_dictnameloaded_weight
param_nameweight_nameshard_idparamr   r+   r+   r,   load_weightsP  s*   z!Ernie4_5_ForCausalLM.load_weightsc                 C   s   | j jj| jjfS r-   )r   rz   r%   r   )r'   r+   r+   r,   get_embed_and_headg  s   z'Ernie4_5_ForCausalLM.get_embed_and_headrU   )r3   r4   r5   packed_modules_mappingr   r   r   r   r6   r   r!   r   rW   r   r2   r   r   r   r   r7   r+   r+   r)   r,   r   !  s4    r   c                   @   s(   e Zd Zdeeeejf  fddZdS )Ernie4_5_MoeForCausalLMr   c              	   C   sd  t jddd| jjd}t|  }|D ]\}}| jjr d|v r q|dr&qd|v r0|dd	}| j	D ]'\}}}||vr=q3d
|v rF||vrFq3|||}|| }	|	j
}
|
|	||  nT|D ]3}|\}}}}||vrjq]|||}|| v r|| }	|	j
}
|
|	||||d ntd| d n|| v r|| }	t|	dt}
|
|	| qtd| dqd S )Nr   	down_projr   )ckpt_gate_proj_nameckpt_down_proj_nameckpt_up_proj_namerC   r   z
model.mtp_z#moe_statics.e_score_correction_biasmoe_staticsr=   zmlp.experts.)r   	expert_idr   z'(replaced) not found in model.r   r   )r   make_expert_params_mappingr(   r#   r   r   r   
startswithr   r   r   r   r   rI   r   )r'   r   expert_params_mappingr   r   r   r   r   r   r   r   mappingr   r+   r+   r,   r   l  sh   

z$Ernie4_5_MoeForCausalLM.load_weightsN)	r3   r4   r5   r   r   r6   r!   rW   r   r+   r+   r+   r,   r   k  s     r   )7rw   typingr   r   r   r   r   r!   torch.nn.functionalr   
functionalr.   ;transformers.models.ernie4_5_moe.configuration_ernie4_5_moer   sglang.srt.distributedr	   r
   sglang.srt.layers.communicatorr   sglang.srt.layers.layernormr   "sglang.srt.layers.logits_processorr   "sglang.srt.layers.moe.ep_moe.layerr   ,sglang.srt.layers.moe.fused_moe_triton.layerr   sglang.srt.layers.moe.topkr   *sglang.srt.layers.quantization.base_configr   *sglang.srt.layers.vocab_parallel_embeddingr   r   ,sglang.srt.model_executor.forward_batch_infor   $sglang.srt.model_loader.weight_utilsr   sglang.srt.models.deepseek_v2r   rN   sglang.srt.models.llamar   ri   sglang.srt.utilsr   r   Moduler   r8   rX   ry   r   r   
EntryClassr+   r+   r+   r,   <module>   s6   Ka2J@