o
    پi5                     @   sL  d Z ddlZddlZddlmZ ddlmZmZmZm	Z	m
Z
 ddlZddlmZ ddlm  mZ ddlmZ ddlmZ ddlmZmZ ddlmZmZ dd	lmZmZ dd
lmZ ddl m!Z! ddl"m#Z# ddl$m%Z% ddl&m'Z' ddl(m)Z)m*Z*m+Z+ ddl,m-Z- ddl.m/Z/m0Z0 ddl1m2Z2 ddl3m4Z4m5Z5 ddl6m7Z7 ddl8m9Z9m:Z: ddl;m<Z<m=Z=m>Z> ddl?m@Z@mAZA ddlBmCZC ddlDmEZE ddlFmGZGmHZHmIZI ddlJmKZK ddlLmMZM ddlNmOZO ddlPmQZQmRZRmSZS eR ZTeUeVZWG dd  d ejXZYG d!d" d"ejXZZG d#d$ d$ejXZ[G d%d& d&ejXeGZ\G d'd( d(ejXZ]e]gZ^dS ))zBInference-only Qwen2-VL model compatible with HuggingFace weights.    N)partial)IterableListOptionalTupleType)	rearrange)ACT2FN)Qwen2_5_VLConfigQwen2_5_VLVisionConfig)Qwen2_5_VisionPatchEmbedQwen2_5_VisionRotaryEmbedding)get_tensor_model_parallel_rank$get_tensor_model_parallel_world_size)get_pp_group)envs)
SiluAndMul)VisionAttention)RMSNorm)ColumnParallelLinearMergedColumnParallelLinearRowParallelLinear)LogitsProcessor)PoolerPoolingType)QuantizationConfig)PPMissingLayerget_layer_id)ParallelLMHead)/MultiModalityDataPaddingPatternMultimodalTokensgeneral_mm_embed_routine)ModalityMultimodalDataItemMultimodalInputs)ForwardBatchPPProxyTensors)default_weight_loader)
Qwen2Model)RotaryPosMixinWeightsMapperpermute_inv)!run_dp_sharded_mrope_vision_model)ViTCudaGraphRunner)get_global_server_args)
add_prefixis_cudais_npuc                       s^   e Zd Z						ddededed	ee d
edef fddZde	j
de	j
fddZ  ZS )Qwen2_5_VLMLPNTsilu Fin_featureshidden_featuresbiasquant_configprefixuse_data_parallelc           	   	      s   t    |r	dnt | _|rdnt | _t||gd ||td|| j| jd| _t	||||td|| j| jd| _
|| _| jdkrHt | _d S t| j  d	tjd
tjf fdd}|| _d S )N   r      gate_up_proj)
input_sizeoutput_sizesr6   r7   r8   tp_sizetp_rank	down_projr6   r7   r8   r?   r@   r2   xreturnc                    s   | j ddd\}} || S )Nr;   dim)chunk)rC   gateupbase_act P/home/ubuntu/.local/lib/python3.10/site-packages/sglang/srt/models/qwen2_5_vl.py_act_fn   s   z'Qwen2_5_VLMLP.__init__.<locals>._act_fn)super__init__r   r?   r   r@   r   r.   r<   r   rA   
hidden_actr   actr	   torchTensor)	selfr4   r5   r6   rR   r7   r8   r9   rO   	__class__rK   rN   rQ   X   s8   

		


zQwen2_5_VLMLP.__init__rC   rD   c                 C   s*   |  |\}}| |}| |\}}|S N)r<   rS   rA   )rV   rC   gate_up_x_downrM   rM   rN   forward   s   
zQwen2_5_VLMLP.forward)NTr2   Nr3   F)__name__
__module____qualname__intboolr   r   strrQ   rT   rU   r]   __classcell__rM   rM   rW   rN   r1   W   s*    -r1   c                       s   e Zd Z							ddeded	ed
eej dee de	dede
deddf fddZ	ddejdejdejdejfddZ  ZS )Qwen2_5_VisionBlockr2   Nr3   r   ư>FrG   intermediate_dim	num_heads
norm_layerr7   r8   num_dummy_headsrms_norm_epsr9   rD   c                    sj   t    t||	d| _t||	d| _t|||ddd|td|||
d
| _t||||td||
d| _	d S )NepsTattn)
	embed_dimrh   projection_sizeuse_qkv_parallel	proj_biasflatten_batchr7   r8   rj   r9   mlp)rR   r7   r8   r9   )
rP   rQ   r   norm1norm2r   r.   rn   r1   rt   )rV   rG   rg   rh   rR   ri   r7   r8   rj   rk   r9   rW   rM   rN   rQ      s.   
zQwen2_5_VisionBlock.__init__rC   
cu_seqlensposition_embeddingsc                 C   s   |j \}}}|d|}| ||||}	t|	d}	| j|	|||d}
t|
d}
|
d|}| j||d\}}||||}||||}| |}|| }|S )NrE   zs b h -> b s h)rw   rx   	output_wszb s h -> s b h)residual)shapereshaperu   r   rn   rv   rt   )rV   rC   rw   rx   ry   SBHx2dhidden_statesrn   attn2d	x_norm_2dx_after_add_2dx_normx_after_addmlp_outrM   rM   rN   r]      s$   


zQwen2_5_VisionBlock.forward)r2   NNr3   r   rf   FrY   )r^   r_   r`   ra   r   nnModuler   r   rc   floatrb   rQ   rT   rU   r]   rd   rM   rM   rW   rN   re      sN    	
+re   c                       s^   e Zd Z				ddedededee d	ed
eddf fddZde	j
de	j
fddZ  ZS )Qwen2_5_VisionPatchMergerr;   Nr3   FrG   context_dimspatial_merge_sizer7   r8   r9   rD   c           	         s   t    ||d  | _t|dd| _|rdnt }|rdnt }tt	| j| jd|t
d|||dt t| j|d|t
d	|||dg| _d S )
Nr;   rf   rl   r:   r   Tzmlp.0rB   zmlp.2)rP   rQ   hidden_sizer   ln_qr   r   r   
ModuleListr   r.   GELUr   rt   )	rV   rG   r   r   r7   r8   r9   r?   r@   rW   rM   rN   rQ      s6   
		
z"Qwen2_5_VisionPatchMerger.__init__rC   c                 C   s`   |j \}}}|d|}| |}|d| j}| j\}}}||\}	}
||	}	||	\}}
|S )NrE   )r{   r|   r   viewr   rt   )rV   rC   r}   r~   Dr   mlp_fc1mlp_actmlp_fc2
x_parallelr[   outrM   rM   rN   r]      s   
z!Qwen2_5_VisionPatchMerger.forward)r;   Nr3   F)r^   r_   r`   ra   r   r   rc   rb   rQ   rT   rU   r]   rd   rM   rM   rW   rN   r      s*    &r   c                       s   e Zd Z					ddededee ded	ed
ee	 ddf fddZ
dd ZedejfddZedejfddZdejdejfddZdejdejdejfddZdejdejdejfddZ  ZS )Qwen2_5_VisionTransformerrf   Nr3   Fvision_confignorm_epsr7   r8   r9   max_context_lenrD   c              
      s@  t    j}j}j}	|	| _|	|	 | _j}
j j}j	j
| _
j| _j| _jd d d | _j| _t|||
 d| _ttj|d  }t|d | _t fddt|D | _tj |	tdd	| _rd
nt | _|| _tot j!" | _#d | _$| j#rt%| | _$d S d S )N      )
patch_sizetemporal_patch_sizein_channelsro   rl   r;   c                    s2   g | ]}t  jtd | dqS )zblocks.)rG   rg   rh   rR   ri   r7   r8   r9   )re   rR   r.   ).0ir   mlp_hidden_sizeri   rh   r8   r7   r9   r   rM   rN   
<listcomp>2  s    z6Qwen2_5_VisionTransformer.__init__.<locals>.<listcomp>merger)rG   r   r   r7   r8   r9   r:   )&rP   rQ   r   r   r   spatial_merge_unitr   r   depthrh   fullatt_block_indexeswindow_sizeintermediate_sizer9   out_hidden_sizer   patch_embedr   r   	LayerNormr   rotary_pos_embr   rangeblocksr   r.   r   r   r?   r   _is_cudar   SGLANG_VIT_ENABLE_CUDA_GRAPHget	enable_cgcuda_graph_runnerr,   )rV   r   r   r7   r8   r9   r   r   r   r   r   r   head_dimrW   r   rN   rQ     s\   
	
z"Qwen2_5_VisionTransformer.__init__c                 C   sj  dg}d}| j | j | j }g }|D ]\}}}|| j || j }	}
t||	 |
 ||	|
}||	|  }||
|  }|	| | }|
| | }t|d|d|fdd}||||||}|ddddd||| ||}|dk	ddgd}|d}||dk }|
||  |d| j |d  }||  |||	 |
  7 }qtj|dd	}||fS )
Nr   constantir:      r;      rE   rF   )r   r   r   rT   aranger|   Fpadpermutesumappendcumsumr   extendtolistitemcat)rV   grid_thwcu_window_seqlenswindow_index_idvit_merger_window_sizewindow_indexgrid_tgrid_hgrid_w
llm_grid_h
llm_grid_windexpad_hpad_wnum_windows_hnum_windows_windex_paddedseqlens	index_newcu_seqlens_tmprM   rM   rN   get_window_indexT  sP   
z*Qwen2_5_VisionTransformer.get_window_indexc                 C      | j jjjS rY   )r   projweightdtyperV   rM   rM   rN   r        zQwen2_5_VisionTransformer.dtypec                 C   r   rY   )r   r   r   devicer   rM   rM   rN   r     r   z Qwen2_5_VisionTransformer.devicer   c           
      C   s   g }|D ]\}}}|  ||| j}||dkr|n||d qtj|dd}|d d dd f  }| |}|| d}	|	S )Nr:   r   rF   )	rot_pos_idsr   r   repeatrT   r   maxr   flatten)
rV   r   pos_idsthwbasemax_grid_sizerotary_pos_emb_fullr   rM   rM   rN   rot_pos_emb  s    
z%Qwen2_5_VisionTransformer.rot_pos_embrC   c                 C   sR  | j r	| ||S |j| j| jd}| |}| |}| |\}}tj	||jtj
d}t|}|j|jd}t|}|j|j|jd}| \}}||| j | jd}||d d d d f }||d}||| j | jd}||d d d d f }||d}tj||fdd}	|	 |	 f}
|
d |j|j|
d |j|jf}
ttj	dg|jtj
d|d d df |d d df  |d d df  jddj|jtj
dg}t|d|g}t r|d}|d}t| jD ]#\}}| j}t|tjr| }||v r|}n|}||||
d	}q| |}||d d f }|S )
Nr   r   r   rE   rF   r   r:   r;   cpu)rw   rx   )r   forward_with_cuda_graphtor   r   r   r   r   rT   tensorint32unique_consecutiver*   sizer|   r   r   cossinr   	new_zerosr0   	unsqueeze	enumerater   r   
isinstancerU   r   r   )rV   rC   r   r   r   r   reverse_indicesseq_lenr[   embrx   rw   	layer_numblkfullatt_indexescu_seqlens_nowrM   rM   rN   r]     sn   


.




z!Qwen2_5_VisionTransformer.forwardc                 C   s  |j | j| jd}| |}| |}| |\}}tj||jtjd}t	|}|j |jd}t
|}|j |j|jd}| \}}||| j | jd}||d d d d f }||d}||| j | jd}||d d d d f }||d}tj||fdd}	|	 |	 f}
|
d  |j|j|
d  |j|jf}
ttjdg|jtjd|d d df |d d df  |d d df  jddj |jtjdg}t|d|g}| jj||
|||dS )	Nr   r   rE   rF   r   r:   r;   )rC   rx   rw   r   output_indices)r   r   r   r   r   r   rT   r   r   r   r*   r   r|   r   r   r   r   r   r   r   run)rV   rC   r   r   r   r   r   r   r[   r   rx   rw   rM   rM   rN   r     sX   


.
z1Qwen2_5_VisionTransformer.forward_with_cuda_graph)rf   Nr3   FN)r^   r_   r`   r   r   r   r   rc   rb   ra   rQ   r   propertyrT   r   r   rU   r   r]   r   rd   rM   rM   rW   rN   r     sR    G-
Qr   c                       s  e Zd Zg dZddddddZdd	d
giZeddiddddddZ		d=dede	e
 deddf fddZdee defddZdee dejfdd Zed!Zd"edefd#d$Zdee dejfd%d&Zd'ee d(eej d)eej d*edejf
d+d,Zd-d. Ze 		/	d>dejd0ejd*ed1ed2e	e  f
d3d4Z!d5e"e#eejf  fd6d7Z$d8d9 Z%d?d:e	ee  fd;d<Z&  Z'S )@"Qwen2_5_VLForConditionalGeneration)z.gate_up_proj.z.down_proj.z.q_proj.z.k_proj.z.v_proj.z.o_proj.)qkv_projr   )r  r:   )r  r;   )r<   r   )r<   r:   )q_projk_projv_proj	gate_projup_projr<   r  r  zattn.qkvzattn.qkv_projzlanguage_model.model.zvisual.zlanguage_model.lm_head.)zmodel.language_model.zmodel.visual.zlm_head.zmodel.)orig_to_new_substrorig_to_new_prefixNr3   configr7   r8   rD   c                    s   t    t | _|| _t j| _| jjsJt	||t
d|d| _| jjrE| jjdkr4| jjr4| jj| _nt| jj| jj|t
d|d| _nt | _nd | _t|jt|dd|t
d|| j| jjd	| _d
| jjv | _t|| _ttj dd| _!d| _"d S )Nmodel)r8   r:   lm_head)r7   r8   rk   rf   visual)r   r7   r8   r9   r   mrope_sectionT)pooling_type	normalizeF)#rP   rQ   r   pp_groupr  r-   mm_enable_dp_encoderr9   encoder_onlyr'   r.   r  is_last_rank
world_sizetie_word_embeddingsembed_tokensr  r   
vocab_sizer   r   r   r   getattrmax_position_embeddingsr  rope_scalingis_mrope_enabledr   logits_processorr   r   LASTpoolercapture_aux_hidden_states)rV   r  r7   r8   rW   rM   rN   rQ   N  sB   






z+Qwen2_5_VLForConditionalGeneration.__init__	input_ids	mm_inputsc                 C   s   t  }|||S rY   )r   pad_input_tokens)rV   r'  r(  patternrM   rM   rN   pad_input_ids  s   z0Qwen2_5_VLForConditionalGeneration.pad_input_idsitemsc           	      C   s   t jdd |D dd| jj}t jdd |D dd}t| jdd}|dkr6| jj}t|dt|dd}d	}|	 d
krO|j
d }||krI|S ||krO|S |	 d
ks[J |	 |	 d
ksgJ |	 | jrut| j|| ddS | j||d}|S )Nc                 S      g | ]}|j qS rM   featurer   r   rM   rM   rN   r         zHQwen2_5_VLForConditionalGeneration.get_image_feature.<locals>.<listcomp>r   rF   c                 S   r-  rM   )image_grid_thwr0  rM   rM   rN   r     r1  ro   rE   r   i  r;   rope_3d	rope_typer   )rT   r   typer  r   concatr  r  r   rG   r{   r9   r+   r   )	rV   r,  pixel_valuesr2  expected_dimvision_confraw_patch_dimcurrent_dimimage_embedsrM   rM   rN   get_image_feature  s2   
z4Qwen2_5_VLForConditionalGeneration.get_image_featurezU^model\.layers\.(\d+)\.(?:self_attn|mlp)\.(?:qkv_proj|o_proj|down_proj|gate_up_proj)$module_namec                 C   s   t | j|S rY   )rb   _lora_patternmatch)rV   r@  rM   rM   rN   should_apply_lora  s   z4Qwen2_5_VLForConditionalGeneration.should_apply_lorac                 C   s   t jdd |D dd| jj}t jdd |D dd}| dks)J | | dks5J | | jrCt| j||	 ddS | j||d	}|S )
Nc                 S   r-  rM   r.  r0  rM   rM   rN   r     r1  zHQwen2_5_VLForConditionalGeneration.get_video_feature.<locals>.<listcomp>r   rF   c                 S   r-  rM   )video_grid_thwr0  rM   rM   rN   r     r1  r;   r3  r4  r6  )
rT   r   r7  r  r   r8  rG   r9   r+   r   )rV   r,  r9  rD  video_embedsrM   rM   rN   get_video_feature  s   z4Qwen2_5_VLForConditionalGeneration.get_video_feature
modalities
embeddingsindicesforward_batchc                 C   sH   g }t t|||D ]\}\}}	}
|	d u s|
d u rq
||	 q
||fS rY   )r   zipr   )rV   inputs_embedsrG  rH  rI  rJ  new_embeddingsr   modality	embeddingr   rM   rM   rN   post_process  s   	
z/Qwen2_5_VLForConditionalGeneration.post_processc                 C   s   | j jS rY   )r  r  r   rM   rM   rN   get_input_embeddings  s   z7Qwen2_5_VLForConditionalGeneration.get_input_embeddingsF	positionsget_embeddingpp_proxy_tensorsc           	      C   s   | j r|j}|j s'| r'| j r'|jdkr|ddks'J d|  t||| j| ||d}d}| j	r;|\}}| j
jrQ|sK| ||| j||S | ||S |S )a
  Run forward pass for Qwen2_5-VL.

        Args:
            input_ids: Flattened (concatenated) input_ids corresponding to a
                batch.
            positions: Flattened (concatenated) position ids corresponding to a
                batch.
                **NOTE**: If mrope is enabled (default setting for Qwen2-VL
                opensource models), the shape will be `(3, seq_len)`,
                otherwise it will be `(seq_len,).
                (Use input_metadata.mrope_positions to replace it)
        r;   r   r   zMmultimodal section rotary embedding requires (3, seq_len) positions, but got )r'  rJ  language_modelmultimodal_modelrR  rT  N)r"  mrope_positionsforward_mode	is_decodecontains_image_inputsndimr   r    r  r&  r  r  r#  r  r%  )	rV   r'  rR  rJ  input_embedsrS  rT  r   aux_hidden_statesrM   rM   rN   r]     sD   	z*Qwen2_5_VLForConditionalGeneration.forwardweightsc              	   C   s  g d}t | jdd}|D ]\}}d|v rq| jjr6| jjr6d|v r6d|v r6|d }t|dt}||| |D ]b\}}	}
|	|vrBq8d|v rOd	|vrOd
|vrOq8||	|}t	|}|d urut
| drut
| jdru|| jjk st|| jjkruq8|dr||vrq8| jjs| jjr||vrq8|| }|j}||||
  n>d|v r|dd}z|dr||vrW q|| v r|| }nW qW n ty   t|   w t|dt}||| qd S )N))	.qkv_projz.q_projq)r_  z.k_projk)r_  z.v_projv)r<   r  r:   )r<   r  r   F)remove_duplicatezrotary_emb.inv_freqzmodel.embed_tokens.weightzlm_head.weightweight_loaderr  r  r  r  start_layerz.biasz	attn.qkv.zattn.qkv_proj.)dictnamed_parametersr  r  r  r  r  r&   replacer   hasattrr  re  	end_layerendswithr  language_onlyrd  keysKeyErrorprint)rV   r^  stacked_params_mappingparams_dictnameloaded_weightlm_head_paramrd  
param_nameweight_nameshard_idlayer_idparamrM   rM   rN   load_weights  s|   



z/Qwen2_5_VLForConditionalGeneration.load_weightsc                 C   s   | j jj| jjfS rY   )r  r  r   r  r   rM   rM   rN   get_embed_and_headh  s   z5Qwen2_5_VLForConditionalGeneration.get_embed_and_head	layer_idsc                 C   sN   d| _ d| j_ |d u r| jj}d|d |d g| j_d S dd |D | j_d S )NTr;   r   c                 S   s   g | ]}|d  qS )r:   rM   )r   valrM   rM   rN   r   v  s    zSQwen2_5_VLForConditionalGeneration.set_eagle3_layers_to_capture.<locals>.<listcomp>)r&  r  r  num_hidden_layerslayers_to_capture)rV   r|  
num_layersrM   rM   rN   set_eagle3_layers_to_capturek  s   z?Qwen2_5_VLForConditionalGeneration.set_eagle3_layers_to_capture)Nr3   )NFNrY   )(r^   r_   r`   #default_bitsandbytes_target_modules#bitsandbytes_stacked_params_mappingpacked_modules_mappingr)   hf_to_sglang_mapperr
   r   r   rc   rQ   r   ra   r#   r+  r"   rT   rU   r?  recompilerA  rb   rC  rF  r!   r$   rP  rQ  no_gradr%   r]   r   r   rz  r{  r  rd   rM   rM   rW   rN   r  )  s    

7#
=Q r  )___doc__loggingr  	functoolsr   typingr   r   r   r   r   rT   torch.nnr   torch.nn.functional
functionalr   einopsr   transformers.activationsr	   7transformers.models.qwen2_5_vl.configuration_qwen2_5_vlr
   r   2transformers.models.qwen2_5_vl.modeling_qwen2_5_vlr   r   sglang.srt.distributedr   r   %sglang.srt.distributed.parallel_stater   sglang.srt.environr   sglang.srt.layers.activationr   "sglang.srt.layers.attention.visionr   sglang.srt.layers.layernormr   sglang.srt.layers.linearr   r   r   "sglang.srt.layers.logits_processorr   sglang.srt.layers.poolerr   r   *sglang.srt.layers.quantization.base_configr   sglang.srt.layers.utilsr   r   *sglang.srt.layers.vocab_parallel_embeddingr   sglang.srt.managers.mm_utilsr   r    "sglang.srt.managers.schedule_batchr!   r"   r#   ,sglang.srt.model_executor.forward_batch_infor$   r%   $sglang.srt.model_loader.weight_utilsr&   sglang.srt.models.qwen2r'   sglang.srt.models.utilsr(   r)   r*   sglang.srt.multimodal.mm_utilsr+   +sglang.srt.multimodal.vit_cuda_graph_runnerr,   sglang.srt.server_argsr-   sglang.srt.utilsr.   r/   r0   r   	getLoggerr^   loggerr   r1   re   r   r   r  
EntryClassrM   rM   rM   rN   <module>   s\   
5J5     
R