o
    پiy                     @   s
  d Z ddlZddlmZmZmZ ddlZddlmZ ddlm	Z	 ddl
mZ ddlmZ ddlmZ dd	lmZ dd
lmZ ddlmZ ddlmZmZ ddlmZ ddlmZ ddlmZ ddlm Z  ddl!m"Z" e#e$Z%G dd dej&Z'G dd deZ(e(gZ)dS )z,Inference-only GLM-OCR Speculative Decoding.    N)IterableOptionalTuple)nn)PretrainedConfig)$get_tensor_model_parallel_world_size)'get_global_expert_distribution_recorder)is_dp_attention_enabled)RMSNorm)LogitsProcessor)QuantizationConfig)ParallelLMHeadVocabParallelEmbedding)ForwardBatch)Glm4DecoderLayer)GlmOcrForConditionalGeneration)get_global_server_args)
add_prefixc                       sb   e Zd Z		ddedee deddf fddZ	dd	ej	d
ej	de
dej	dej	f
ddZ  ZS )GlmOcrModelNextNN configquant_configprefixreturnc                    s   t    |d ur| dkrtd d }|j| _t|j|jt  t	d|d| _
t|j|jd| _t|j|jd| _tjd|j |jdd| _t|d	|t	d
|d| _t | _t|j|jd| j_d S )Nmodelopt_fp4zHOverriding GlmOcrModelNextN quant config for modelopt_fp4 GLM-OCR model.embed_tokens)	enable_tpr   )eps   F)biasr   decoder)r   r   )super__init__get_nameloggerwarning
vocab_sizer   hidden_sizer	   r   r   r
   rms_norm_epsenormhnormr   Lineareh_projr   r    Moduleshared_headnormselfr   r   r   	__class__ S/home/ubuntu/.local/lib/python3.10/site-packages/sglang/srt/models/glm_ocr_nextn.pyr"   ,   s0   

zGlmOcrModelNextN.__init__	input_ids	positionsforward_batchinput_embedsc                 C   s   |d u r
|  |}n|}|jd dkr'| tj| || |jjfdd}d }t	 
  | ||||\}}W d    n1 sCw   Y  |j sb|d ur\| j||\}}|S | j|}|S )Nr   )dim)r   shaper,   torchcatr)   r*   	spec_infohidden_statesr   disable_this_regionr    forward_modeis_idler.   r/   )r1   r6   r7   r8   r9   r@   residual_r4   r4   r5   forwardQ   s0   


zGlmOcrModelNextN.forwardNr   N)__name__
__module____qualname__r   r   r   strr"   r=   Tensorr   rF   __classcell__r4   r4   r2   r5   r   +   s0    *r   c                	       s|   e Zd Z		ddedee deddfddZe	 d	ej
d
ej
dedej
fddZdeeeej
f  f fddZ  ZS )#GlmOcrForConditionalGenerationNextNNr   r   r   r   r   c                 C   s~   t j|  || _t | _|| _t||td|d| _	t
|j|j|td|t jd| _t|| _t jr:d| _d S d| _d S )Nmodel)r   zmodel.shared_head.head)r   r   use_attn_tp_groupr      )r   r-   r"   r   r   tp_sizer   r   r   rP   r   r&   r'   r   enable_dp_lm_headlm_headr   logits_processordisable_shared_experts_fusionnum_fused_shared_expertsr0   r4   r4   r5   r"   x   s&   

z,GlmOcrForConditionalGenerationNextN.__init__r6   r7   r8   c                 C   s    |  |||}| ||| j|S rH   )rP   rV   rU   )r1   r6   r7   r8   r@   r4   r4   r5   rF      s   
z+GlmOcrForConditionalGenerationNextN.forwardweightsc                    s   t  j|dd d S )NT)is_nextn)r!   load_weights)r1   rY   r2   r4   r5   r[      s   z0GlmOcrForConditionalGenerationNextN.load_weightsrG   )rI   rJ   rK   r   r   r   rL   r"   r=   no_gradrM   r   rF   r   r   r[   rN   r4   r4   r2   r5   rO   w   s.    
(rO   )*__doc__loggingtypingr   r   r   r=   r   transformersr   sglang.srt.distributedr   #sglang.srt.eplb.expert_distributionr   sglang.srt.layers.dp_attentionr	   sglang.srt.layers.layernormr
   "sglang.srt.layers.logits_processorr   *sglang.srt.layers.quantization.base_configr   *sglang.srt.layers.vocab_parallel_embeddingr   r   ,sglang.srt.model_executor.forward_batch_infor   sglang.srt.models.glm4r   sglang.srt.models.glm_ocrr   sglang.srt.server_argsr   sglang.srt.utilsr   	getLoggerrI   r$   r-   r   rO   
EntryClassr4   r4   r4   r5   <module>   s,   
L
+