o
    پid=                  
   @   s  d Z ddlZddlZddlmZ ddlmZmZmZm	Z	 ddl
Z
ddlmZ ddlmZmZ ddlmZ ddlmZ ddlmZ dd	lmZmZ dd
lmZ ddlmZmZ ddlmZ ddl m!Z! e"e#Z$ee!Z%G dd deZ&de'de(de
j)de'de*f
ddZ+G dd deZ,e,Z-dS )zBInference-only Qwen3-VL model compatible with HuggingFace weights.    N)	lru_cache)IterableOptionalTupleUnion)Qwen3VLMoeConfigQwen3VLMoeTextConfig)ModelConfigForExpertLocation)FusedMoE)QuantizationConfig)ForwardBatchPPProxyTensors)default_weight_loader)Qwen3MoeDecoderLayerQwen3MoeModel)Qwen3VLForConditionalGeneration)get_processorc                       s   e Zd Zddeddedee def fddZd	e	j
fd
dZdedeej d	eej fddZ			ddejdejdedejdee deej d	eejef fddZ  ZS )Qwen3MoeLLMModelN )quant_configprefixdecoder_layer_typeconfigr   r   c                   s*   t  j||||d |j| _td| _d S )N)r   r   r   r      )super__init__hidden_sizerange deepstack_embed_to_decoder_layer)selfr   r   r   r   	__class__ R/home/ubuntu/.local/lib/python3.10/site-packages/sglang/srt/models/qwen3_vl_moe.pyr   )   s   zQwen3MoeLLMModel.__init__returnc                 C   s   | j S N)embed_tokens)r   r"   r"   r#   get_input_embeddings=   s   z%Qwen3MoeLLMModel.get_input_embeddings	layer_idxinput_deepstack_embedsc                 C   s:   |du s	|| j vrdS | j| }|dd||| j f S )zLGet deepstack embeddings for a given layer index, or None if not applicable.N)r   r   )r   r(   r)   sepr"   r"   r#   get_deepstack_embeds@   s
   

z%Qwen3MoeLLMModel.get_deepstack_embeds	input_ids	positionsforward_batchinput_embedspp_proxy_tensorsc                 C   s0  | j jr|d u r| |}n|}d }n|d usJ |d }|d }g }	t| j| j| j D ].\}
}|
| j7 }
|
| jv rI|	|d urF|| n| | 	|
d |}||||||d\}}q.| 	| jd |}| j j
sqt||dS |jd dkr|d u r| |}n
| j|||d\}}t|	dkr|S ||	fS )Nhidden_statesresidual   )post_residual_addition)r1   r2   r   )pp_groupis_first_rankr&   	enumeratelayersstart_layer	end_layerlayers_to_captureappendr+   is_last_rankr   shapenormlen)r   r,   r-   r.   r/   r0   r)   r1   r2   aux_hidden_statesr(   layerdeepstack_embedslast_deepstack_r"   r"   r#   forwardL   sZ   	

		

zQwen3MoeLLMModel.forward)NNN)__name__
__module____qualname__r   r   r   r   strr   nn	Embeddingr'   inttorchTensorr+   r   r   r   rF   __classcell__r"   r"   r    r#   r   (   sJ    
r   nameparams_dictloaded_weightshard_idnum_expertsc           	      C   s8   ||  }|j }t|D ]}|| }|||| || qdS )NT)weight_loaderr   )	rQ   rR   rS   rT   rU   paramrV   	expert_idcurr_expert_weightr"   r"   r#   load_fused_expert_weights   s   rZ   c                       sz   e Zd Zddefdedee def fddZe	
dZd	ed
efddZdeeeejf  fddZedd Z  ZS )"Qwen3VLMoeForConditionalGenerationNr   r   r   r   c                    s   t  |||| d S r%   )r   r   )r   r   r   r   language_model_clsr    r"   r#   r      s   z+Qwen3VLMoeForConditionalGeneration.__init__z6^model\.layers\.(\d+)\.self_attn\.(?:qkv_proj|o_proj)$module_namer$   c                 C   s   t | j|S r%   )bool_lora_pattern_moematch)r   r]   r"   r"   r#   should_apply_lora   s   z4Qwen3VLMoeForConditionalGeneration.should_apply_loraweightsc              	   C   s  g d}t jddd| jjd}d}d}dd	g}| jj}t| d
s't|  | _| j}|D ]\}	}
|	dd}	|D ]C\}}}d|	v sFd|	v rJd}|}||	vrOq9d|	v rTq9d|	v rYq9|	||}	|		|ri|	|vriq9|	|vrnq9||	 }|j
}|||
|  nd}|D ]l}|\}}}}||	vrqd|	v s| jjrqd}|	||}|r|
dd}
d|	v r|
jddd}
t|||
d d| t|||
d d| n#t|||
|| n|	|r||vrq|| }|j
}|||
|||d |}	 nP|rq,d|	v r|	dd}	|	dd}	|		|r|	|vrq,| jjs| jjr|	|vrq,|	| v r5||	 }t|dt}|||
 q,td |	 d! q,d S )"N))	.qkv_projz.q_projq)rc   z.k_projk)rc   z.v_projv)gate_up_projup_projr3   )rg   	gate_projr   ri   	down_projrh   )ckpt_gate_proj_nameckpt_down_proj_nameckpt_up_proj_namerU   )
z.bias_biasz.k_scale_k_scalez.v_scale_v_scalez.weight_scale_weight_scalez.input_scale_input_scaleF)zexperts.w13_weightexperts.gate_up_projr   w1)zexperts.w2_weightexperts.down_projr   w2_cached_params_dictzmodel.language_model.zmodel.rs   ru   Tvisualzmlp.experts   )dimr   rt   r3   w3)rT   rX   z	attn.qkv.zattn.qkv_proj.zmodel.visual.zvisual.rV   z
Parameter z not found in params_dict)r
   make_expert_params_mappingr   rU   hasattrdictnamed_parametersrw   replaceendswithrV   encoder_only	transposechunkrZ   language_onlykeysgetattrr   loggerwarning)r   rb   stacked_params_mappingexpert_params_mappingignore_suffixesis_fused_expertfused_expert_params_mappingrU   rR   rQ   rS   
param_nameweight_namerT   rW   rV   is_expert_weightmappingrX   name_mappedr"   r"   r#   load_weights   s   	



z/Qwen3VLMoeForConditionalGeneration.load_weightsc                 C   s   t |jj|jjd dS )N)
num_layersnum_logical_experts
num_groups)r	   text_confignum_hidden_layersrU   )clsr   r"   r"   r#   $get_model_config_for_expert_locationp  s
   zGQwen3VLMoeForConditionalGeneration.get_model_config_for_expert_location)rG   rH   rI   r   r   r   r   rJ   r   recompiler_   r^   ra   r   r   rN   rO   r   classmethodr   rP   r"   r"   r    r#   r[      s&    
 3r[   ).__doc__loggingr   	functoolsr   typingr   r   r   r   rN   torch.nnrK   sglang.srt.configs.qwen3_vlr   r   sglang.srt.eplb.expert_locationr	   ,sglang.srt.layers.moe.fused_moe_triton.layerr
   *sglang.srt.layers.quantization.base_configr   ,sglang.srt.model_executor.forward_batch_infor   r   $sglang.srt.model_loader.weight_utilsr   sglang.srt.models.qwen3_moer   r   sglang.srt.models.qwen3_vlr   &sglang.srt.utils.hf_transformers_utilsr   	getLoggerrG   r   cached_get_processorr   rJ   r   rO   rM   rZ   r[   
EntryClassr"   r"   r"   r#   <module>   sB   
m
 N