o
    پiS                     @   s   d dl mZmZmZ d dlZd dlmZ d dlmZ d dlm	Z	 d dl
mZ d dlmZ d dlmZ d d	lmZmZ d d
lmZ d dlmZ d dlmZ G dd dejZG dd dejZeZdS )    )IterableOptionalTupleN)nn)PretrainedConfig)$get_tensor_model_parallel_world_size)RMSNorm)LogitsProcessor)QuantizationConfig)ParallelLMHeadVocabParallelEmbedding)ForwardBatch)default_weight_loader)Qwen2DecoderLayerc                       s`   e Zd Z	ddededee ddf fddZ	ddej	d	ej	d
e
dej	dej	f
ddZ  ZS )MiMoMultiTokenPredictorLayerNconfigprefixquant_configreturnc                    s~   t    t|j|j| _t|j|jd| _t|j|jd| _	t
j|jd |jdd| _t|||d| _t|j|jd| _d S )N)eps   F)bias)r   r   r   )super__init__r   
vocab_sizehidden_sizeembed_tokensr   rms_norm_epstoken_layernormhidden_layernormr   Linear
input_projr   	mtp_blockfinal_layernorm)selfr   r   r   	__class__ N/home/ubuntu/.local/lib/python3.10/site-packages/sglang/srt/models/mimo_mtp.pyr      s   
z%MiMoMultiTokenPredictorLayer.__init__	input_ids	positionsforward_batchinput_embedsc                 C   sx   |d u r
|  |}n|}d||dk< | tj| |jj| |fdd}| j|||d d\}}|| }| 	|}|S )Nr   )dim)r*   hidden_statesr+   residual)
r   r!   torchcatr   	spec_infor/   r   r"   r#   )r$   r)   r*   r+   r,   r/   r0   r'   r'   r(   forward.   s*   


z$MiMoMultiTokenPredictorLayer.forwardN)__name__
__module____qualname__r   strr   r
   r   r1   Tensorr   r4   __classcell__r'   r'   r%   r(   r      s.    r   c                	   @   s   e Zd Z		ddedee deddfddZe	 d	ej
d
ej
dedej
fddZdeeeej
f  fddZdedefddZdd Zdd ZdS )MiMoMTPN r   r   r   r   c                 C   sP   t j|  || _t | _|| _t|||| _t	|j
|j|d| _t|| _d S )N)r   )r   Moduler   r   r   tp_sizer   r   modelr   r   r   lm_headr	   logits_processor)r$   r   r   r   r'   r'   r(   r   S   s   zMiMoMTP.__init__r)   r*   r+   c                 C   s    |  |||}| ||| j|S r5   )r@   rB   rA   )r$   r)   r*   r+   r/   r'   r'   r(   r4   j   s   
zMiMoMTP.forwardweightsc                 C   sH  g d}t |  }|D ]\}}d|v sd|v rqd|v s!d|v r"q| jjr+d|v r+q|dr5||vr5q| |}|D ].\}}}||vrFq<d|vrL nU|||}|d	r\||vr\q<|| }	|	j}
|
|	||  n6|d	ru||vruqd|vrd
|vrd|vrd|vrd|vrd|vrd|vrq|| }	t	|	dt
}
|
|	| qd S )N))qkv_projq_projq)rD   k_projk)rD   v_projv)gate_up_proj	gate_projr   )rK   up_proj   zrotary_emb.inv_freq	projectorzrotary_emb.cos_cachedzrotary_emb.sin_cachedzlm_head.weightzmodel.vision_towerr"   z.biasr   rA   r   r   r!   r#   weight_loader)dictnamed_parametersr   tie_word_embeddings
startswith map_model_name_to_mtp_param_namereplaceendswithrP   getattrr   )r$   rC   stacked_params_mappingparams_dictnameloaded_weight
param_nameweight_nameshard_idparamrP   r'   r'   r(   load_weightsv   sN   	

zMiMoMTP.load_weightsr[   c                 C   sf   dd l }g d}d}|||}|d ur1|D ]}||v r(|| d}|  S q|| d}|S )Nr   )r   r   r!   r#   zmodel.mtp_layers.(\d+).zmodel.zmodel.mtp_block.)rematchrV   group)r$   r[   rb   name_without_prefixpatternrd   sub_namer'   r'   r(   rU      s   z(MiMoMTP.map_model_name_to_mtp_param_namec                 C   s   | j jj| jjfS r5   )r@   r   weightrA   )r$   r'   r'   r(   get_embed_and_head   s   zMiMoMTP.get_embed_and_headc                 C   s8   | j j`| j`|| j j_|| j_tj  tj  d S r5   )r@   r   rh   rA   r1   cudaempty_cachesynchronize)r$   embedheadr'   r'   r(   set_embed_and_head   s   

zMiMoMTP.set_embed_and_head)Nr=   )r6   r7   r8   r   r   r
   r9   r   r1   no_gradr:   r   r4   r   r   ra   rU   ri   ro   r'   r'   r'   r(   r<   R   s4    
6r<   )typingr   r   r   r1   r   transformersr   sglang.srt.distributedr   sglang.srt.layers.layernormr   "sglang.srt.layers.logits_processorr	   *sglang.srt.layers.quantization.base_configr
   *sglang.srt.layers.vocab_parallel_embeddingr   r   ,sglang.srt.model_executor.forward_batch_infor   $sglang.srt.model_loader.weight_utilsr   sglang.srt.models.qwen2r   r>   r   r<   
EntryClassr'   r'   r'   r(   <module>   s   <y