o
    پi2                     @   s   d Z ddlZddlmZmZmZ ddlZddlmZ ddlm	Z	 ddl
mZmZ ddlmZ ddlmZ dd	lmZ dd
lmZ ddlmZ ddlmZmZ ddlmZ ddlmZ eeZ G dd deZ!e!Z"dS )z2Inference-only ExaoneMoE MTP Speculative Decoding.    N)IterableOptionalTuple)nn)PretrainedConfig)get_pp_group$get_tensor_model_parallel_world_size)RMSNorm)LogitsProcessor)QuantizationConfig)ParallelLMHead)ForwardBatch)ExaoneMoEForCausalLMExaoneMoEModel)get_global_server_args)
add_prefixc                       s   e Zd Z		ddedee deddfddZe	 	dd	ej
d
ej
dedeej
 fddZ	ddeeeej
f  def fddZ  ZS )ExaoneMoEForCausalLMMTPN configquant_configprefixreturnc                 C   s   t j|  || _d|_t | _|| _t | _	t j
d|j |jdd| _t|j|jd| _t|j|jd| _t||td|d| _t|j|j|td|t jd	| _t|| _d S )
N      F)bias)epsmodel)r   lm_head)r   r   use_attn_tp_group)r   Module__init__r   num_hidden_layersr   tp_sizer   r   pp_groupLinearhidden_sizefcr	   rms_norm_epspre_fc_norm_embeddingpre_fc_norm_hiddenr   r   r   r   
vocab_sizer   enable_dp_lm_headr   r
   logits_processor)selfr   r   r    r.   T/home/ubuntu/.local/lib/python3.10/site-packages/sglang/srt/models/exaone_moe_mtp.pyr    (   s,   z ExaoneMoEForCausalLMMTP.__init__	input_ids	positionsforward_batchinput_embedsc                 K   st   |d u r
| j |}|jj}|j s| |}| |}| t	j
||fdd}|  ||||}| ||| j|S )N)dim)r   embed_tokens	spec_infohidden_statesforward_modeis_idler(   r)   r&   torchcatr,   r   )r-   r0   r1   r2   r3   kwargsr8   r.   r.   r/   forwardF   s    	



zExaoneMoEForCausalLMMTP.forwardFweightsis_mtpc                    s   t  j|dd d S )NT)r@   )superload_weights)r-   r?   r@   	__class__r.   r/   rB   d   s   z$ExaoneMoEForCausalLMMTP.load_weights)Nr   )N)F)__name__
__module____qualname__r   r   r   strr    r;   no_gradTensorr   r>   r   r   boolrB   __classcell__r.   r.   rC   r/   r   '   s:    
r   )#__doc__loggingtypingr   r   r   r;   r   transformersr   sglang.srt.distributedr   r   sglang.srt.layers.layernormr	   "sglang.srt.layers.logits_processorr
   *sglang.srt.layers.quantization.base_configr   *sglang.srt.layers.vocab_parallel_embeddingr   ,sglang.srt.model_executor.forward_batch_infor   sglang.srt.models.exaone_moer   r   sglang.srt.server_argsr   sglang.srt.utilsr   	getLoggerrE   loggerr   
EntryClassr.   r.   r.   r/   <module>   s$   
C