o
    پii                     @   s  d Z ddlZddlmZmZmZ ddlZddlmZ ddlm	Z	 ddl
mZ ddlmZ ddlmZ dd	lmZ dd
lmZ ddlmZ ddlmZmZ ddlmZ ddlmZmZ ddlmZ ddl m!Z! e"e#Z$G dd dej%Z&G dd deZ'e'gZ(dS )z5Inference-only GLM-4.5, GLM-4.6 Speculative Decoding.    N)IterableOptionalTuple)nn)PretrainedConfig)$get_tensor_model_parallel_world_size)'get_global_expert_distribution_recorder)is_dp_attention_enabled)RMSNorm)LogitsProcessor)QuantizationConfig)ParallelLMHeadVocabParallelEmbedding)ForwardBatch)Glm4MoeDecoderLayerGlm4MoeForCausalLM)get_global_server_args)
add_prefixc                       sb   e Zd Z		ddedee deddf fddZ	dd	ej	d
ej	de
dej	dej	f
ddZ  ZS )Glm4MoeModelNextNN configquant_configprefixreturnc                    s   t    |d ur| dkrtd d }|j| _t|j|jt t	d|d| _
t|j|jd| _t|j|jd| _tjd|j |jdd| _t|d	|d
t	d|d| _t | _t|j|jd| j_d S )Nmodelopt_fp4zYOverriding Glm4MoeForCausalLMNextN quant config for modelopt_fp4 GLM-4.5 / GLM-4.6 model.embed_tokens)use_attn_tp_groupr   )eps   F)biasr   Tdecoder)r   is_nextnr   )super__init__get_nameloggerwarning
vocab_sizer   hidden_sizer	   r   r   r
   rms_norm_epsenormhnormr   Lineareh_projr   r    Moduleshared_headnormselfr   r   r   	__class__ T/home/ubuntu/.local/lib/python3.10/site-packages/sglang/srt/models/glm4_moe_nextn.pyr#   +   s2   

zGlm4MoeModelNextN.__init__	input_ids	positionsforward_batchinput_embedsc                 C   s   |d u r
|  |}n|}|jd dkr'| tj| || |jjfdd}d }t	 
  | ||||\}}W d    n1 sCw   Y  |j sb|d ur\| j||\}}|S | j|}|S )Nr   )dim)r   shaper-   torchcatr*   r+   	spec_infohidden_statesr   disable_this_regionr    forward_modeis_idler/   r0   )r2   r7   r8   r9   r:   rA   residual_r5   r5   r6   forwardQ   s0   


zGlm4MoeModelNextN.forwardNr   N)__name__
__module____qualname__r   r   r   strr#   r>   Tensorr   rG   __classcell__r5   r5   r3   r6   r   *   s0    +r   c                	       s|   e Zd Z		ddedee deddfddZe	 d	ej
d
ej
dedej
fddZdeeeej
f  f fddZ  ZS )Glm4MoeForCausalLMNextNNr   r   r   r   r   c                 C   s~   t j|  || _t | _|| _t||td|d| _	t
|j|j|td|t jd| _t|| _t jr:d| _d S d| _d S )Nmodel)r   zmodel.shared_head.head)r   r   r   r      )r   r.   r#   r   r   tp_sizer   r   r   rQ   r   r'   r(   r   enable_dp_lm_headlm_headr   logits_processordisable_shared_experts_fusionnum_fused_shared_expertsr1   r5   r5   r6   r#   x   s&   

z Glm4MoeForCausalLMNextN.__init__r7   r8   r9   c                 C   s    |  |||}| ||| j|S rI   )rQ   rV   rU   )r2   r7   r8   r9   rA   r5   r5   r6   rG      s   
zGlm4MoeForCausalLMNextN.forwardweightsc                    s   t  j|dd d S )NT)r!   )r"   load_weights)r2   rY   r3   r5   r6   rZ      s   z$Glm4MoeForCausalLMNextN.load_weightsrH   )rJ   rK   rL   r   r   r   rM   r#   r>   no_gradrN   r   rG   r   r   rZ   rO   r5   r5   r3   r6   rP   w   s.    
(rP   ))__doc__loggingtypingr   r   r   r>   r   transformersr   sglang.srt.distributedr   #sglang.srt.eplb.expert_distributionr   sglang.srt.layers.dp_attentionr	   sglang.srt.layers.layernormr
   "sglang.srt.layers.logits_processorr   *sglang.srt.layers.quantization.base_configr   *sglang.srt.layers.vocab_parallel_embeddingr   r   ,sglang.srt.model_executor.forward_batch_infor   sglang.srt.models.glm4_moer   r   sglang.srt.server_argsr   sglang.srt.utilsr   	getLoggerrJ   r%   r.   r   rP   
EntryClassr5   r5   r5   r6   <module>   s*   
M
+