o
    پiV5                     @   s:  d dl Z d dlmZmZmZ d dlZd dlmZ d dlmZ d dl	m
Z
 d dlmZ d dlmZmZmZ d dlmZmZ d d	lmZ d d
lmZ d dlmZ d dlmZmZ d dlmZ d dlm Z  d dl!m"Z"m#Z#m$Z$ d dl%m&Z& d dl'm(Z( dZ)e *e+Z,G dd dej-Z.G dd dej-Z/G dd de#Z0e0Z1dS )    N)IterableOptionalTuple)nn)PretrainedConfig)$get_tensor_model_parallel_world_size)'get_global_expert_distribution_recorder)LayerCommunicatorLayerScatterModesenable_moe_dense_fully_dp)get_attention_tp_rankis_dp_attention_enabled)RMSNorm)LogitsProcessor)QuantizationConfig)ParallelLMHeadVocabParallelEmbedding)ForwardBatch)default_weight_loader)MiMoV2AttentionMiMoV2FlashForCausalLM	MiMoV2MLP)get_global_server_args)
add_prefixc                       sr   e Zd Z			ddededee deddf
 fd	d
Zde	j
de	j
dedee	j
 dee	j
e	j
f f
ddZ  ZS )MiMoV2MTPLayerr   N configlayer_idquant_configprefixreturnc              	      s  t    || _|j| _t|dd}t|dd }t|dd}td#i d| jd|jd|jd	|jd
t|dd dt|dd d|j	d|j
dt|ddd|dt|d|d|d|d|dt|dddtd|| _d| _d}d}	t rd\}
}nd\}
}t| j|j|j|td||
|d| _t|j|jd| _t|j|jd| _tj|d | j||	d!| _t| j| j| jd"| _d S )$N
rope_thetai@B rope_scalingmax_position_embeddingsi   hidden_size	num_headsnum_kv_headshead_dim
v_head_dimswa_v_head_dimv_scaleattention_value_scalesliding_window_sizeattention_biasattention_sink_biasadd_swa_attention_sink_biasFr   swa_rope_thetar   partial_rotary_factorg      ?r   	self_attnT)r      )NNmlp)r$   intermediate_size
hidden_actr   r   tp_ranktp_sizeepsr3   )r   
num_layersis_layer_sparseis_previous_layer_sparseis_next_layer_sparse)layer_scatter_modesinput_layernormpost_attention_layernorm )super__init__r   r$   getattrr   swa_num_attention_headsswa_num_key_value_headsswa_head_dimr,   r-   r   r2   r<   r   r   r5   r6   r4   r   layernorm_epsilonr@   rA   r
   init_newr?   r	   layer_communicator)selfr   r   r   r   r!   r"   r#   r=   r>   mlp_tp_rankmlp_tp_size	__class__rB   Y/home/ubuntu/.local/lib/python3.10/site-packages/sglang/srt/models/mimo_v2_flash_nextn.pyrD   8   s   
	


	zMiMoV2MTPLayer.__init__	positionshidden_statesforward_batchresidualc                 C   s   | j |||\}}|jd dkr| j|||d}| j |||\}}t   | |}W d    n1 s8w   Y  | j |||\}}||fS )Nr   )rR   rS   rT   )	rK   prepare_attnshaper2   prepare_mlpr   disable_this_regionr4   postprocess_layer)rL   rR   rS   rT   rU   rB   rB   rQ   forward{   s&   zMiMoV2MTPLayer.forward)r   Nr   )__name__
__module____qualname__MiMoV2FlashConfigintr   r   strrD   torchTensorr   r   r[   __classcell__rB   rB   rO   rQ   r   7   s4    Cr   c                       sb   e Zd Z		ddedee deddf fddZ	dd	ej	d
ej	de
dej	dej	f
ddZ  ZS )MiMoV2ModelNextNNr   r   r   r   r    c                    s   t    |j| _t|j|jt td|d| _t|j|j	d| _
t|j|j	d| _tjd|j |jdd| _t|d|td|d	| _t|j|j	d| _d S )
Nembed_tokens)use_attn_tp_groupr   r9      F)biasr   decoder)r   r   )rC   rD   
vocab_sizer   r$   r   r   rf   r   rI   enormhnormr   Lineareh_projr   	mtp_blockfinal_layernorm)rL   r   r   r   rO   rB   rQ   rD      s$   
zMiMoV2ModelNextN.__init__	input_idsrR   rT   input_embedsc           	      C   s   |d u r
|  |}n|}|jd dkr'| tj| || |jjfdd}| j	|||d d\}}d }|j
 s[|jrF|d u rB|n|| }|d urV| ||\}}||fS | |}||fS )Nr   )dim)rR   rS   rT   rU   )rf   rW   ro   rb   catrl   rm   	spec_inforS   rp   forward_modeis_idle return_hidden_states_before_normrq   )	rL   rr   rR   rT   rs   rS   rU   hidden_states_before_norm_rB   rB   rQ   r[      s8   	


zMiMoV2ModelNextN.forward)Nr   N)r\   r]   r^   r   r   r   ra   rD   rb   rc   r   r[   rd   rB   rB   rO   rQ   re      s0    #re   c                   @   s   e Zd Z			ddedee dee deddf
dd	Ze	
 d
e	jde	jdede	jfddZddeeee	jf  fddZdedefddZdd Zdd ZdS )	MiMoV2MTPNr   r   r   draft_model_idxr   r    c                 C   sf   t j|  || _t | _|| _t||td|d| _	t
|j|j|td|t jd| _t|| _d S )Nmodel)r   lm_head)r   r   rg   )r   ModulerD   r   r   r8   r   re   r   r   r   rk   r$   r   enable_dp_lm_headr   r   logits_processor)rL   r   r   r   r   rB   rB   rQ   rD      s   zMiMoV2MTP.__init__rr   rR   rT   c                 C   s(   |  |||\}}| j||| j||dS )N)r{   )r   r   r   )rL   rr   rR   rT   rS   r{   rB   rB   rQ   r[      s   zMiMoV2MTP.forwardFweightsc                 C   s  g d}t |  }|D ]\}}d|v sd|v rqd|v s!d|v r"q| jjr+d|v r+q|dr5||vr5q| |}|D ].\}}}	||vrFq<d|vrL n|||}|d	r\||vr\q<|| }
|
j}||
||	  n`|d	ru||vruqd|vrd
|vrd|vrd|vrd|vrd|vrd|vrq||	 v r|| }
d|v rt
 |
  }|
j||||
    qt|
dt}||
| qtd| d qd S )N))qkv_projq_projq)r   k_projk)r   v_projv)gate_up_proj	gate_projr   )r   up_projr3   zrotary_emb.inv_freq	projectorzrotary_emb.cos_cachedzrotary_emb.sin_cachedzlm_head.weightzmodel.vision_towerrp   z.biasrf   r   rl   rm   ro   rq   r.   weight_loaderz
Parameter z not found in params_dict)dictnamed_parametersr   tie_word_embeddings
startswith map_model_name_to_mtp_param_namereplaceendswithr   keysr   numeldatacopy_rE   r   loggerwarning)rL   r   is_nextnstacked_params_mappingparams_dictnameloaded_weight
param_nameweight_nameshard_idparamr   startrB   rB   rQ   load_weights  s\   	
zMiMoV2MTP.load_weightsr   c                 C   sz   dd l }d|v r|dd}g d}d}|||}|d ur;|D ]}||v r2|| d}|  S q || d}|S )Nr   pre_mlp_layernormrA   )rl   rm   ro   rq   zmodel.mtp.layers.(\d+).zmodel.zmodel.mtp_block.)rer   matchgroup)rL   r   r   name_without_prefixpatternr   sub_namerB   rB   rQ   r   O  s   z*MiMoV2MTP.map_model_name_to_mtp_param_namec                 C   s   | j jj| jjfS r}   )r   rf   weightr   )rL   rB   rB   rQ   get_embed_and_heade  s   zMiMoV2MTP.get_embed_and_headc                 C   s8   | j j`| j`|| j j_|| j_tj  tj  d S r}   )r   rf   r   r   rb   cudaempty_cachesynchronize)rL   embedheadrB   rB   rQ   set_embed_and_headh  s   

zMiMoV2MTP.set_embed_and_head)NNr   )F)r\   r]   r^   r   r   r   r`   ra   rD   rb   no_gradrc   r   r[   r   r   r   r   r   r   rB   rB   rB   rQ   r~      s:    
Ar~   )2loggingtypingr   r   r   rb   r   transformersr   sglang.srt.distributedr   #sglang.srt.eplb.expert_distributionr   sglang.srt.layers.communicatorr	   r
   r   sglang.srt.layers.dp_attentionr   r   sglang.srt.layers.layernormr   "sglang.srt.layers.logits_processorr   *sglang.srt.layers.quantization.base_configr   *sglang.srt.layers.vocab_parallel_embeddingr   r   ,sglang.srt.model_executor.forward_batch_infor   $sglang.srt.model_loader.weight_utilsr   sglang.srt.models.mimo_v2_flashr   r   r   sglang.srt.server_argsr   sglang.srt.utilsr   r_   	getLoggerr\   r   r   r   re   r~   
EntryClassrB   rB   rB   rQ   <module>   s2   
cH 