o
    پi5                     @   s  d dl Z d dlmZ d dlmZ d dlZd dlmZ d dlm	Z	 d dl
mZ d dlmZ d dlmZ d dlmZ d d	lmZmZ d d
lmZ d dlmZ d dlmZmZ d dlmZ e eZ de	de!dee" fddZ#G dd dej$Z%G dd dej$Z&G dd deZ'e'gZ(dS )    N)Iterable)Optional)PretrainedConfig)$get_tensor_model_parallel_world_size)GemmaRMSNorm)LogitsProcessor)QuantizationConfig)ParallelLMHeadVocabParallelEmbedding)ForwardBatch)default_weight_loader)Step3p5DecoderLayerStep3p5ForCausalLM)
add_prefixconfigweight_namereturnc                 C   sV   t | dr)t| dddkr)| j}t| jD ]}|d||  dr(||   S qdS )zReturn MTP/nextn layer index if this weight belongs to spec layers.

    Step3p5 MTP/nextn checkpoints append extra layers after the main decoder:
      model.layers.[num_hidden_layers ... num_hidden_layers + num_nextn_predict_layers)
    num_nextn_predict_layersr   model.layers..N)hasattrgetattrnum_hidden_layersranger   
startswith)r   r   basei r   Q/home/ubuntu/.local/lib/python3.10/site-packages/sglang/srt/models/step3p5_mtp.py#get_spec_layer_idx_from_weight_name   s   
r   c                       s:   e Zd Z	d	d	 fddZdejdejfddZ  ZS )

SharedHeadNr   c                    s:   t    t|j|j| _t|j|j|d| _| j| _	d S )N)quant_config)
super__init__r   hidden_sizerms_norm_epsnormr	   
vocab_sizeheadlm_head)selfr   r!   	__class__r   r   r#   -   s   

zSharedHead.__init__hidden_statesc                 C   
   |  |S N)r&   )r*   r-   r   r   r   forward9      
zSharedHead.forwardr/   )r   N)__name__
__module____qualname__r#   torchTensorr0   __classcell__r   r   r+   r   r    +   s    r    c                       sx   e Zd Z		ddedee deddf fddZ	dd	ej	d
ej	de
dej	dej	f
ddZd	ej	dej	fddZ  ZS )Step3p5AMultiTokenPredictorN r   r!   prefixr   c                    s   t    || _t|j|j| _|j| _|j	| _
d}t|j|j| _t|j|j| _tj|jd |jdd| _t||d| _t||| dd| _| jj| _d S )N-      F)bias)r   r!   z
.mtp_block)r   layer_idr:   )r"   r#   r   r
   r'   r$   embed_tokensr   mtp_start_layer_idxr   num_mtp_layersr   r%   enormhnormnnLineareh_projr    shared_headr   	mtp_blockr(   r)   )r*   r   r!   r:   r>   r+   r   r   r#   >   s"   
z$Step3p5AMultiTokenPredictor.__init__	input_ids	positionsforward_batchinput_embedsc           	      C   s   |d u r
|  |}n|}|jd dkr'| tj| || |jjfdd}| j	|||d d\}}d }|j
 sZ|d u r?|n|| }|d urT| j||\}}||fS | j|}||fS )Nr   )dim)rJ   r-   rK   residual)r?   shaperF   r5   catrB   rC   	spec_infor-   rH   forward_modeis_idlerG   r&   )	r*   rI   rJ   rK   rL   r-   rO   hidden_states_before_norm_r   r   r   r0   X   s6   	

z#Step3p5AMultiTokenPredictor.forwardc                 C   r.   r/   )r?   r*   rI   r   r   r   embed_input_ids   r1   z+Step3p5AMultiTokenPredictor.embed_input_ids)Nr9   r/   )r2   r3   r4   r   r   r   strr#   r5   r6   r   r0   rX   r7   r   r   r+   r   r8   =   s2    
)r8   c                   @   s   e Zd Z			ddedee dee deddf
dd	Zd
e	j
de	j
fddZd
e	j
de	j
dede	j
fddZdd Zdd Zdeeee	j
f  dee fddZdee dedefddZdS )
Step3p5MTPNr9   r   r!   draft_model_idxr:   r   c                 C   sT   t j|  || _t | _|| _|| _t||t	d|d| _
t|| _| j
j| _d S )Nmodel)r   r!   r:   )rD   Moduler#   r   r   tp_sizer!   r[   r8   r   r\   r   logits_processorr)   )r*   r   r!   r[   r:   r   r   r   r#      s   
zStep3p5MTP.__init__rI   c                 C   s   | j |S r/   )r\   rX   rW   r   r   r   rX      s   zStep3p5MTP.embed_input_idsrJ   rK   c                 C   s,   |  |||\}}| j||| j jj||dS )N)rU   )r\   r_   rG   r(   )r*   rI   rJ   rK   r-   rU   r   r   r   r0      s   zStep3p5MTP.forwardc                 C   s   | j jj| j jjjfS r/   )r\   r?   weightrG   r(   )r*   r   r   r   get_embed_and_head   s   zStep3p5MTP.get_embed_and_headc                 C   s   d S r/   r   )r*   embedr(   r   r   r   set_embed_and_head   s   zStep3p5MTP.set_embed_and_headweightsc              
   C   sH  g d}g d}t |  }t }|D ]\}}d|v rqt| j|}|d ur0|| jj| j kr0qd|vr9|d u r9q| ||}|D ]:\}	}
}|
|vrKqAd|v rT||vrTqAd|v s\d|v r]qA||
|	}|	drm||vrmqA|| }|j
}||||  n|D ]E}|\}	}
}|
|vrq~||
|	}|	ds|	d	r||vrq~|| }|j
}t|jd
 D ]}|| }||||||d q||  n;|	dr||vsd|v rqd|v r|dd}d|v rt| jdr| jjd
ksJ d}|| }t|dt}||| || qt| }||kr"t|| }|d
 }td| d|S )N))qkv_projq_projq)re   k_projk)re   v_projv)gate_up_proj	gate_projr   )rl   up_proj   )).moe.experts.w13_weightz.moe.gate_proj.weightw1)rp   z.moe.up_proj.weightw3)z.moe.experts.w2_weightz.moe.down_proj.weightw2zrotary_emb.inv_freqr?   zmlp.experts.expertsmoez.bias_biasr   )shard_id	expert_idtok_embeddingsrG   zshared_head.outputzshared_head.headr   zmodel.embed_tokens.weightweight_loaderzSome parameters like zE are not in the checkpoint and will falsely use random initialization)dictnamed_parameterssetr   r   r   r[   _rewrite_spec_layer_namereplaceendswithrz   r   rP   addr   r   r   r   keyslistRuntimeError)r*   rd   stacked_params_mappingexpert_params_mappingparams_dictloaded_paramsnameloaded_weight
spec_layer
param_namer   rw   paramrz   mappingrx   loaded_weight_expertparams_need_to_loadmissing_paramsparam_name_exampler   r   r   load_weights   s   	






zStep3p5MTP.load_weightsr   r   c                 C   s   |du r|S d| d}| |r|ddd}g d}d}|D ]
}||v r*d} nq |s;|d| dd| d	}d| d}| |rM||d
d}|S )z
        Rewrite the weight name to match the format of the original model.
        Add .mtp_block for modules in transformer layer block for spec layer
        Nr   z.transformer.r   ro   )r?   rB   rC   rF   rG   FTz.mtp_block.zmodel.)r   r   )r*   r   r   transformer_prefixspec_layer_weight_namesspec_layer_weightr   layers_prefixr   r   r   r~   (  s(   

z#Step3p5MTP._rewrite_spec_layer_name)NNr9   )r2   r3   r4   r   r   r   intrY   r#   r5   r6   rX   r   r0   ra   rc   r   tupler}   r   r~   r   r   r   r   rZ      s:    

$lrZ   ))loggingcollections.abcr   typingr   r5   torch.nnrD   transformersr   sglang.srt.distributedr   sglang.srt.layers.layernormr   "sglang.srt.layers.logits_processorr   *sglang.srt.layers.quantization.base_configr   *sglang.srt.layers.vocab_parallel_embeddingr	   r
   ,sglang.srt.model_executor.forward_batch_infor   $sglang.srt.model_loader.weight_utilsr   sglang.srt.models.step3p5r   r   sglang.srt.utilsr   	getLoggerr2   loggerrY   r   r   r]   r    r8   rZ   
EntryClassr   r   r   r   <module>   s8    

Q 
C