o
    پiP                     @   st  d Z ddlZddlZddlZddlmZmZ ddlmZm	Z	m
Z
mZmZmZ ddlZddlZddlmZ ddlmZ ddlmZ ddlmZmZ ddlmZ dd	lmZ dd
lmZ ddl m!Z! ddl"m#Z#m$Z$m%Z% ddl&m'Z'm(Z( ddl)m*Z* ddl+m,Z,m-Z- ddl.m/Z/ ddl0m1Z1 ddl2m3Z3m4Z4 ddl5m6Z6m7Z7 ddl8m9Z9m:Z: ddl;m<Z<m=Z=m>Z> ddl?m@Z@mAZA ddlBmCZC ddlDmEZE ddlFmGZGmHZHmIZI ddlJmKZK ddlLmMZM ddlNmOZO ddlPmQZQmRZRmSZS ddlTmUZU eVeWZXG dd  d ejYZZG d!d" d"ejYZ[G d#d$ d$ejYZ\G d%d& d&ejYZ]G d'd( d(ejYeGZ^eeUZ_G d)d* d*eEZ`G d+d, d,ejYZaeaZbdS )-zBInference-only Qwen3-VL model compatible with HuggingFace weights.    N)	lru_cachepartial)CallableIterableListOptionalTupleUnion)	rearrange)ACT2FN)Qwen3VLConfigQwen3VLVisionConfig)$get_tensor_model_parallel_world_size)get_pp_group)envs)VisionAttention)get_attention_tp_rankget_attention_tp_sizeis_dp_attention_enabled)ColumnParallelLinearRowParallelLinear)LogitsProcessor)PoolerPoolingType)QuantizationConfig)get_rope)PPMissingLayerget_layer_id)ParallelLMHeadVocabParallelEmbedding)/MultiModalityDataPaddingPatternMultimodalTokensgeneral_mm_embed_routine)ModalityMultimodalDataItemMultimodalInputs)ForwardBatchPPProxyTensors)default_weight_loader)
Qwen3Model)RotaryPosMixinWeightsMapper"compute_cu_seqlens_from_grid_numpy)!run_dp_sharded_mrope_vision_model)ViTCudaGraphRunner)get_global_server_args)
add_prefixget_int_env_varis_npu)get_processorc                       sV   e Zd Z					ddededed	ee d
edef fddZde	j
fddZ  ZS )Qwen3_VisionMLPTsiluN Fin_featureshidden_featuresbiasquant_configprefixuse_data_parallelc              
      s   t    |r	dnt | _|rdnt | _t||||td|| j| jd| _t	||||td|| j| jt
 d| _t| | _d S )N   r   
linear_fc1r8   r9   r:   tp_sizetp_rank
linear_fc2r8   r9   r:   r?   r@   use_dp_attention_reduce)super__init__r   r?   r   r@   r   r/   r=   r   r   rA   r   act)selfr6   r7   r8   
hidden_actr9   r:   r;   	__class__ N/home/ubuntu/.local/lib/python3.10/site-packages/sglang/srt/models/qwen3_vl.pyrE   P   s.   

	
zQwen3_VisionMLP.__init__xc                 C   s&   |  |\}}| | |\}}|S N)r=   rA   rF   )rG   rM   x_fc1_
mlp_outputrK   rK   rL   forwardr   s   zQwen3_VisionMLP.forward)Tr4   Nr5   F)__name__
__module____qualname__intboolr   r   strrE   torchTensorrR   __classcell__rK   rK   rI   rL   r3   N   s(    "r3   c                       s4   e Zd Zd fddZdejdejfddZ  ZS )	Qwen3VLVisionPatchEmbedreturnNc                    sX   t    |j| _|j| _|j| _|j| _| j| j| jg}tj| j| j||dd| _	d S )NT)kernel_sizestrider8   )
rD   rE   
patch_sizetemporal_patch_sizein_channelshidden_size	embed_dimnnConv3dproj)rG   configr^   rI   rK   rL   rE   y   s   
z Qwen3VLVisionPatchEmbed.__init__hidden_statesc                 C   sD   | j jj}|d| j| j| j| j}|  |j|dd| j}|S )Ndtype)	rg   weightrl   viewrb   ra   r`   tord   )rG   ri   target_dtyperK   rK   rL   rR      s   
zQwen3VLVisionPatchEmbed.forward)r]   N)rS   rT   rU   rE   rY   rZ   rR   r[   rK   rK   rI   rL   r\   x   s    r\   c                       s   e Zd Z					ddedededeeegejf  d	ee d
e	de
ddf fddZ	ddejdejdejdejdeej dejfddZ  ZS )Qwen3_VisionBlockr4   Nr5   Fdim	num_headsintermediate_dim
norm_layerr9   r:   r;   r]   c	           	         s|   t    |d u rttjdd}||| _||| _t|||ddd|td||t	 d
| _
t|||d|| d|d| _d S )Nư>epsTattn)
rd   rs   projection_sizeuse_qkv_parallel	proj_biasflatten_batchr9   r:   r;   rC   z.mlp)rH   r8   r9   r:   r;   )rD   rE   r   re   	LayerNormnorm1norm2r   r/   r   ry   r3   mlp)	rG   rr   rs   rt   rH   ru   r9   r:   r;   rI   rK   rL   rE      s4   


zQwen3_VisionBlock.__init__rM   
cu_seqlensrotary_pos_emb_cosrotary_pos_emb_sin	output_wsc           
      C   sZ   |  |}t|d}| j|||||d}t|d}||7 }| |}| |}	||	7 }|S )Nzs b ... -> b s ...)r   r   r   r   zb s ... -> s b ...)r   r
   ry   r   r   )
rG   rM   r   r   r   r   ri   ry   r   r   rK   rK   rL   rR      s   




zQwen3_VisionBlock.forward)r4   NNr5   FrN   )rS   rT   rU   rV   r   r   re   Moduler   rX   rW   rE   rY   rZ   rR   r[   rK   rK   rI   rL   rq      sJ    	
-rq   c                       sz   e Zd Z						ddededeeegejf  ded	ed
ee	 de
deddf fddZdejdejfddZ  ZS )Qwen3VLMoeVisionPatchMergerN   Fr5   rr   context_dimru   spatial_merge_sizeuse_postshuffle_normr9   r:   r;   r]   c	           	   
      s   t    ||d  | _|| _|d u rttjdd}||r | jn|| _|r(dnt | _	|r0dnt
 | _t| j| jd|td|| j	| jd| _t | _t| j|d|td	|| j	| jt d
| _d S )Nr   rv   rw   r<   r   Tr=   r>   rA   rB   )rD   rE   rc   r   r   re   r~   normr   r?   r   r@   r   r/   r=   GELUact_fnr   r   rA   )	rG   rr   r   ru   r   r   r9   r:   r;   rI   rK   rL   rE      s<   

	z$Qwen3VLMoeVisionPatchMerger.__init__rM   c                 C   sZ   | j r| |d| j}n
| |d| j}| |\}}| |}| |\}}|S )Nrj   )r   r   rn   rc   r=   r   rA   )rG   rM   
x_parallelrP   outrK   rK   rL   rR     s   
z#Qwen3VLMoeVisionPatchMerger.forward)Nr   FNr5   F)rS   rT   rU   rV   r   r   re   r   rW   r   rX   rE   rY   rZ   rR   r[   rK   rK   rI   rL   r      s6    	
,r   c                       s  e Zd Z				d%dededee ded	ed
df fddZ	e
d
ejfddZe
d
ejfddZdeee  d
eejejf fddZded
ejfddZdd Zdd Zdd Zdejdejd
ejfddZdejdejd
ejfd d!Zd"eeeejf  d
ee fd#d$Z  ZS )&Qwen3VLMoeVisionModelrv   Nr5   Fvision_confignorm_epsr9   r:   r;   r]   c              	      s  t    t _j_j_j_tjd _jj _	t
 j_j_j_jd _j__j_jdtj  _td_jjrhtjjt tdd_nt _ttj|d jj }t ||d dd	d
d_!t" fddt#j$D _%t&jj jtdd_'t" fddt#tjD _(rdnt) _*t+_,d S )N      ?r   r<   )rh   	pos_embedr9   use_attn_tp_groupr:   rw   i    g     @T)	head_size
rotary_dimmax_positionbaseis_neox_stylec                    s8   g | ]}t jjjj td | dqS )zblocks.)rr   rs   rt   rH   ru   r9   r:   r;   )rq   rc   rs   intermediate_sizerH   r/   .0	layer_idxru   r:   r9   rG   r;   r   rK   rL   
<listcomp>I  s    z2Qwen3VLMoeVisionModel.__init__.<locals>.<listcomp>merger)rr   r   ru   r   r9   r:   r;   c                    s6   g | ]}t jjjd  td| dqS )Tzdeepstack_merger_list.)rr   r   r   r   ru   r9   r:   r;   )r   out_hidden_sizerc   r   r/   r   r   rK   rL   r   b  s    )-rD   rE   r   pp_grouprc   rs   num_position_embeddingsrV   num_grid_per_sidenum_gridr.   &enable_precise_embedding_interpolationalign_cornersr`   r   spatial_merge_unitra   r;   deepstack_visual_indexesr   lenr\   patch_embedis_first_rankr   r   r/   r   r   r   re   r~   r   rotary_pos_emb
ModuleListrangedepthblocksr   r   deepstack_merger_listr   r?   r-   cuda_graph_runner)rG   r   r   r9   r:   r;   head_dimrI   r   rL   rE     sv   


zQwen3VLMoeVisionModel.__init__c                 C      | j jjjS rN   )r   rg   rm   rl   rG   rK   rK   rL   rl   v     zQwen3VLMoeVisionModel.dtypec                 C   r   rN   )r   rg   rm   devicer   rK   rK   rL   r   z  r   zQwen3VLMoeVisionModel.devicegrid_thwc                 C   s   g }|D ]\}}}|  ||| j}||dkr|n||d qtj|ddj| jdd}tdd |D }| j	
|\}}	|| d}
|	| d}|
|fS )Nr<   r   rr   Tnon_blockingc                 s   s     | ]\}}}t ||V  qd S rN   )max)r   rP   hwrK   rK   rL   	<genexpr>  s    z4Qwen3VLMoeVisionModel.rot_pos_emb.<locals>.<genexpr>)rot_pos_idsr   appendrepeatrY   catro   r   r   r   get_cos_sinflatten)rG   r   pos_idstr   r   r   max_grid_sizecossincos_combinedsin_combinedrK   rK   rL   rot_pos_emb~  s    z!Qwen3VLMoeVisionModel.rot_pos_embdim_sizec                 C   s^   | j rtjd| jd |tjd}|S tj|tjdd | j|  d }t|d| jd }|S )zw
        Compute continuous interpolation indices for a single dimension.

        Returns continuous indices.
        r   r<   rk   r   )r   nplinspacer   float32arangeclip)rG   r   indicesrK   rK   rL   _get_interpolation_indices  s   z0Qwen3VLMoeVisionModel._get_interpolation_indicesc                 C   sJ  t |t j}t |d d| jd }|| }t |t j}t |d d| jd }|| }| j}	|dddf |	 |  |dddf |	 |  |dddf |	 |  |dddf |	 |  g}
d| dddf d|   d| dddf |  |dddf d|   |dddf |  g}|
|fS )z
        Compute bilinear interpolation indices and weights.

        Returns tuple of (indices, weights), each as 4 numpy arrays for the 4 corner points.
        r<   r   N)r   floorastypeint64r   r   r   )rG   h_idxsw_idxsh_fh_cdhw_fw_cdwsider   weightsrK   rK   rL   _calculate_indices_and_weights  s$   z4Qwen3VLMoeVisionModel._calculate_indices_and_weightsc              	   C   s   g }| j }t||||D ]/\}}}	}
||d}|	| }|
| }||||||ddddddddd}|| qtj|ddS )	z[
        Tile and reorganize position embeddings to align with the token sequence.
        r<   rj   r      r         r   )	r   zipr   rn   permuter   r   rY   r   )rG   patch_pos_embedsgrid_tsgrid_hsgrid_wsresult_parts
merge_sizer   r   r   r   h_mergew_mergerK   rK   rL   _get_position_embedding  s   z-Qwen3VLMoeVisionModel._get_position_embeddingc                 C   s  |   }|dddf  }|dddf  }|dddf  }| jjj}| jjj}dd t||D }t|}	t	j
d|	ft	jd}
t	j
d|	ft	jd}d}t|||D ]:\}}}| |}| |}| ||\}}|||  }tdD ]}|| |
|||f< || ||||f< qy|}qXt|
|}t|j||d	}| |d
}|d|	d
}||d
 jdd}||}| ||||S )a  Interpolate position embeddings for (batch, 3) size input dimensions.

        Performs bilinear interpolation on spatial dimensions (height, width) and replicates
        along temporal dimension. The result is reorganized according to spatial_merge_size.

        Args:
            grid_thw: Tensor of shape [batch_size, 3] with (temporal, height, width) dimensions
                     in patches for each sample.

        Returns:
            Interpolated position embeddings tensor.
        Nr   r<   r   c                 S   s   g | ]\}}|| qS rK   rK   )r   r   r   rK   rK   rL   r     s    zDQwen3VLMoeVisionModel.fast_pos_embed_interpolate.<locals>.<listcomp>r   rk   )rl   r   rj   r   )cpunumpytolistr   rm   r   rl   r   sumr   zerosr   r   r   r   r   rY   
from_numpyro   rn   	unsqueezesplitr   )rG   r   grid_thw_cputemporal_dimsheight_dims
width_dimsr   rl   patches_sizetotal_patchesall_indices_npall_weights_npcurrent_idxr   r   r   r   r   r   r   end_idxi
idx_tensorweight_tensor
pos_embedsr   rK   rK   rL   fast_pos_embed_interpolate  s:   




z0Qwen3VLMoeVisionModel.fast_pos_embed_interpolaterM   c                 C   s&  t j r| ||S |j| j| jd}| |}t|t	r)|}t
j|t
jd}n| }| |}||7 }| |\}}t|}t sM|j| jdd}n|d}|d}g }d}	t| jD ]!\}
}|||||d}|
| jv r| j|	 |}|| |	d7 }	q`| |}t
j|g| dd	}|S )
Nr   rl   rk   Tr   r   r<   r   )r   r   r   r   )r   SGLANG_VIT_ENABLE_CUDA_GRAPHgetforward_with_cuda_graphro   r   rl   r   
isinstancelistrY   tensorint32r   r  r   r+   r1   r   	enumerater   r   r   r   r   r   )rG   rM   r   grid_thw_listr  r   r   r   deepstack_feature_listsnum_deepstack_captured	layer_numblkdeepstack_featureri   rK   rK   rL   rR     sJ   









zQwen3VLMoeVisionModel.forwardc              	   C   s   |j | j| jd}| |}t|tr|}tj|tjd}n|	 }| 
|}||7 }| |\}}t|}t|tjsGtj||jtjd}n	|j |jtjd}| }| jj|d |||d d dS )Nr  rk   )rM   position_embeddingsr   r   r   cu_window_seqlensoutput_indices)ro   r   rl   r   r  r  rY   r  r  r   r  r   r+   rZ   
contiguousr   run)rG   rM   r   r  r  r   r   r   rK   rK   rL   r  D  s.   


z-Qwen3VLMoeVisionModel.forward_with_cuda_graphr   c                 C   s   g d}t | jdd}t }|D ]9\}}|D ]\}}}	||vr!q|||}|| }
|
j}||
||	  n|| }
t|
dt}||
| || q|S )N))	attn.qkv.zattn.q.q)r!  zattn.k.k)r!  zattn.v.vFremove_duplicateweight_loader)dictnamed_parameterssetreplacer'  getattrr'   add)rG   r   stacked_params_mappingparams_dictloaded_paramsnameloaded_weight
param_nameweight_nameshard_idparamr'  rK   rK   rL   load_weightsl  s"   
z"Qwen3VLMoeVisionModel.load_weights)rv   Nr5   F)rS   rT   rU   r   floatr   r   rX   rW   rE   propertyrY   rl   r   r  rV   tuplerZ   r   r   r   r   r  rR   r  r   r*  r7  r[   rK   rK   rI   rL   r     s\    `

7
6
,(r   c                       s   e Zd Zddddedee def fddZd	ed
ee	j
 dee	j
 fddZ			dde	j
de	j
dede	j
dee d
ee	j
 dee	j
ef fddZ  ZS )Qwen3LLMModelNr5   )r9   r:   rh   r9   r:   c                   sR   t  j|||d | jjs| jt|jjksJ d|j| _t	t|jj| _
d S )Nrh   r9   r:   zLstart_layer should be greater than or equal to len(deepstack_visual_indexes))rD   rE   r   r   start_layerr   r   r   rc   r    deepstack_embed_to_decoder_layer)rG   rh   r9   r:   rI   rK   rL   rE     s   

zQwen3LLMModel.__init__r   input_deepstack_embedsr]   c                 C   s:   |du s	|| j vrdS | j| }|dd||| j f S )zLGet deepstack embeddings for a given layer index, or None if not applicable.N)r>  rc   )rG   r   r?  seprK   rK   rL   get_deepstack_embeds  s
   

z"Qwen3LLMModel.get_deepstack_embeds	input_ids	positionsforward_batchinput_embedspp_proxy_tensorsc                 C   s0  | j jr|d u r| |}n|}d }n|d usJ |d }|d }g }	t| j| j| j D ].\}
}|
| j }
|
| jv rI|	|d urF|| n| | 	|
d |}||||||d\}}q.| 	| jd |}| j j
sqt||dS |jd dkr|d u r| |}n
| j|||d\}}t|	dkr|S ||	fS )Nri   residualr<   )post_residual_addition)ri   rG  r   )r   r   embed_tokensr  layersr=  	end_layerlayers_to_capturer   rA  is_last_rankr&   shaper   r   )rG   rB  rC  rD  rE  rF  r?  ri   rG  aux_hidden_statesr   layerdeepstack_embedslast_deepstackrP   rK   rK   rL   rR     sZ   


		

zQwen3LLMModel.forward)NNN)rS   rT   rU   r   r   r   rX   rE   rV   rY   rZ   rA  r%   r&   r	   rR   r[   rK   rK   rI   rL   r;    sF    
r;  c                       s  e Zd ZeddiddddddZdd	efd
edee de	ddf fddZ
dd Zdee defddZdee dejfddZdee dejfddZdd ZedZde	defdd Z	!	d+dejd"ejd#ed$ed%ee f
d&d'Zd(eee	ejf  fd)d*Z   Z!S ),Qwen3VLForConditionalGenerationzattn.qkvzattn.qkv_projzlanguage_model.model.zvisual.zlanguage_model.lm_head.)model.language_model.zmodel.visual.zlm_head.model.)orig_to_new_substrorig_to_new_prefixNr5   rh   r9   r:   r]   c                    sX  t    t | _t j| _t|jd t	|ddt
d|| jd| _|tu r)|| _n|j| _t	|dd| j_t	|dd| j_t|drE|js~|| j|t
d|d	| _| jjry| jjd
kre| jjre| jj| _nt| jj| jj|t jt
d|d| _nt | _nd | _d| jjv | _t| j| _ t!t"j#dd| _$|jj%| _%t&| j%| _'t(j)dt(j*di| _+d S )Nrms_norm_epsrv   zmodel.visual)r9   r   r:   r;   encoder_onlyFlanguage_onlyzmodel.language_modelr<  r<   lm_headr   mrope_sectionT)pooling_type	normalize),rD   rE   r   r   r.   mm_enable_dp_encoderr;   r   r   r,  r/   visualr;  rh   text_configrY  rZ  hasattrmodelrM  
world_sizetie_word_embeddingsrI  r[  r   
vocab_sizerc   enable_dp_lm_headr   rope_scalingis_mrope_enabledr   logits_processorr   r   LASTpoolerr   r   num_deepstack_embeddingsr"   IMAGEVIDEOuse_deepstack)rG   rh   r9   r:   language_model_clsrI   rK   rL   rE     sN   





z(Qwen3VLForConditionalGeneration.__init__c                 C   sn   |j d d| j  dksJ d|j  dd| j  d| jj}|d d d |f }|d d |d f }||fS )Nrj   r<   r   zhidden_state of z should be divisible by ())rN  rm  rh   rc   )rG   	embeddingseparate_indexrE  r?  rK   rK   rL   separate_deepstack_embedsF  s   z9Qwen3VLForConditionalGeneration.separate_deepstack_embedsrB  	mm_inputsc                 C   s   t  }|||S rN   )r    pad_input_tokens)rG   rB  rv  patternrK   rK   rL   pad_input_idsP  s   z-Qwen3VLForConditionalGeneration.pad_input_idsitemsc                 C   s:  t jdd |D dd| jj}t jdd |D dd}| dks)J | | dks5J | tdd}tdd}|dkr\|dkr\| jrUt	| j||
 d	d
S | j||dS |
 }dd |D }t|}dg}	|D ]}
|	|	d |
  qp|	d }|d|ksJ d|d d| dg }d}||k r|}d}d}||k r|| }|dkr|| |krn|dkr|d |krn||7 }|d7 }|d7 }||k s||kr|d }|| }d}|	| }|	| }||| }||| }| jrt	| j||
 d	d
}n| j||d}|| |}||k st j|ddS )Nc                 S      g | ]}|j qS rK   featurer   itemrK   rK   rL   r   V      zEQwen3VLForConditionalGeneration.get_image_feature.<locals>.<listcomp>r   r   c                 S   r{  rK   )image_grid_thwr~  rK   rK   rL   r   Y  r  r   SGLANG_VLM_MAX_PATCHES_PER_VITSGLANG_VLM_MAX_IMAGES_PER_VITrope_3d	rope_typer   c                 S   s   g | ]	}t t|qS rK   )rV   mathprod)r   grK   rK   rL   r   o  s    rj   zpixel_values rows (z) != total patches (rr  r<   )rY   r   typer`  rl   concatrr   r0   r;   r,   r   r   r   size)rG   rz  pixel_valuesr  max_patches_per_callmax_images_per_callr  patches_per_image
num_imagescum_patchespr  all_chunk_embeds	img_startimg_endpatches_in_chunkimages_in_chunknext_patchespatch_start	patch_endpixel_chunk
grid_chunkchunk_embedsrK   rK   rL   get_image_featureT  s   



9z1Qwen3VLForConditionalGeneration.get_image_featurec                 C   s   |D ]}|j | jj|_ qtjdd |D dd| jj}|D ]	}|j d|_ q!tjdd |D dd}|	 dksCJ |	 |	 dksOJ |	 | j
r]t| j|| dd	S | j||d
}|S )Nc                 S   r{  rK   r|  r~  rK   rK   rL   r     r  zEQwen3VLForConditionalGeneration.get_video_feature.<locals>.<listcomp>r   r   r   c                 S   r{  rK   )video_grid_thwr~  rK   rK   rL   r     r  r   r  r  r  )r}  ro   r`  r   rY   r   r  rl   r  rr   r;   r,   r   )rG   rz  r  r  r  video_embedsrK   rK   rL   get_video_feature  s    z1Qwen3VLForConditionalGeneration.get_video_featurec                 C   s   | j jS rN   )rc  rI  r   rK   rK   rL   get_input_embeddings  s   z4Qwen3VLForConditionalGeneration.get_input_embeddingszU^model\.layers\.(\d+)\.(?:self_attn|mlp)\.(?:qkv_proj|o_proj|down_proj|gate_up_proj)$module_namec                 C   s   t | j|S rN   )rW   _lora_patternmatch)rG   r  rK   rK   rL   should_apply_lora  s   z1Qwen3VLForConditionalGeneration.should_apply_loraFrC  rD  get_embeddingrF  c              	   C   s   | j r|j}|j s'| r'| j r'|jdkr|ddks'J d|  t||| j| || j	|d}| j
jrI|sC| ||| j|S | ||S |S )a  Run forward pass for Qwen3-VL.

        Args:
            input_ids: Flattened (concatenated) input_ids corresponding to a
                batch.
            positions: Flattened (concatenated) position ids corresponding to a
                batch.
                **NOTE**: If mrope is enabled (default setting for Qwen2-VL
                opensource models), the shape will be `(3, seq_len)`,
                otherwise it will be `(seq_len,).
                (Use input_metadata.mrope_positions to replace it)
        r   r   r   zMmultimodal section rotary embedding requires (3, seq_len) positions, but got )rB  rD  language_modelmultimodal_modelrC  rp  rF  )ri  mrope_positionsforward_mode	is_decodecontains_image_inputsndimr  r!   rc  rp  r   rM  rj  r[  rl  )rG   rB  rC  rD  r  rF  ri   rK   rK   rL   rR     s>   
z'Qwen3VLForConditionalGeneration.forwardr   c              	   C   s  g d}t | jdd}|D ]\}}d|v rqd|v r!|dd}t|}| jjrDd|v rD| jjrDd	|v rD|d	 }t|d
t	}||| d|v }	|	sf|d urft
| drft
| jdrf|| jjk se|| jjkrfq|D ]:\}
}}||vrrqhd|v rwqh|||
}|dr||vrqh| jjs| jjr||vrqh|| }|j}||||  n>d|v r|dd}z|dr||vrW q|| v r|| }nW qW n ty   t|   w t|d
t	}||| qd S )N))	.qkv_projz.q_projr"  )r  z.k_projr#  )r  z.v_projr$  )gate_up_projup_projr<   )r  	gate_projr   Fr%  zrotary_emb.inv_freqr  rT  rU  zmodel.embed_tokens.weightzlm_head.weightr'  r`  rc  r=  z.biasr!  zattn.qkv_proj.)r(  r)  r+  r   r   rM  rh   re  r,  r'   rb  rc  r=  rK  endswithrY  rZ  r'  keysKeyErrorprint)rG   r   r.  r/  r1  r2  layer_idlm_head_paramr'  	is_visualr3  r4  r5  r6  rK   rK   rL   r7    s   



z,Qwen3VLForConditionalGeneration.load_weights)FN)"rS   rT   rU   r*   hf_to_sglang_mapperr;  r   r   r   rX   rE   ru  r   rV   r$   ry  r#   rY   rZ   r  r  r  recompiler  rW   r  r%   r&   rR   r   r   r7  r[   rK   rK   rI   rL   rS    sZ    A
h
$8rS  )c__doc__loggingr  r  	functoolsr   r   typingr   r   r   r   r   r	   r   r   rY   torch.nnre   einopsr
   transformers.activationsr   sglang.srt.configs.qwen3_vlr   r   sglang.srt.distributedr   %sglang.srt.distributed.parallel_stater   sglang.srt.environr   "sglang.srt.layers.attention.visionr   sglang.srt.layers.dp_attentionr   r   r   sglang.srt.layers.linearr   r   "sglang.srt.layers.logits_processorr   sglang.srt.layers.poolerr   r   *sglang.srt.layers.quantization.base_configr   "sglang.srt.layers.rotary_embeddingr   sglang.srt.layers.utilsr   r   *sglang.srt.layers.vocab_parallel_embeddingr   r   sglang.srt.managers.mm_utilsr    r!   "sglang.srt.managers.schedule_batchr"   r#   r$   ,sglang.srt.model_executor.forward_batch_infor%   r&   $sglang.srt.model_loader.weight_utilsr'   sglang.srt.models.qwen3r(   sglang.srt.models.utilsr)   r*   r+   sglang.srt.multimodal.mm_utilsr,   +sglang.srt.multimodal.vit_cuda_graph_runnerr-   sglang.srt.server_argsr.   sglang.srt.utilsr/   r0   r1   &sglang.srt.utils.hf_transformers_utilsr2   	getLoggerrS   loggerr   r3   r\   rq   r   r   cached_get_processorr;  rS  
EntryClassrK   rK   rK   rL   <module>   sb    
* B:  vj  x