o
    پiv                     @   sH  d Z ddlZddlmZ ddlmZmZmZmZ ddl	Z	ddl
mZ ddlm  mZ ddlmZ ddlmZmZ ddlmZmZ ddlmZ dd	lmZ dd
lmZ ddlmZ ddlm Z m!Z! ddl"m#Z#m$Z$m%Z% ddl&m'Z' ddl(m)Z)m*Z* ddl+m,Z, ddl-m.Z. ddl/m0Z0 ddl1m2Z2 ddl3m4Z4m5Z5 ddl6m7Z7m8Z8 ddl9m:Z:m;Z; ddl<m=Z= ddl>m?Z? ddl@mAZA ddlBmCZC ddlDmEZEmFZF ddlGmHZH eIeJZKeeHZLG dd de!ZMG dd  d ejNZOG d!d" d"ejNZPG d#d$ d$ejNZQG d%d& d&ejNZRG d'd( d(ejNZSG d)d* d*ejNZTG d+d, d,ejNZUeUgZVdS )-zBInference-only GLM-4.1V model compatible with HuggingFace weights.    N)	lru_cache)IterableListOptionalTuple)	rearrange)Glm4vConfigGlm4vVisionConfig)get_tensor_model_parallel_rank$get_tensor_model_parallel_world_size)get_pp_group)
SiluAndMul)vision_utils)VisionAttention)	LayerNormRMSNorm)MergedColumnParallelLinearReplicatedLinearRowParallelLinear)LogitsProcessor)PoolerPoolingType)QuantizationConfig)get_rope)PPMissingLayer)ParallelLMHead)/MultiModalityDataPaddingPatternMultimodalTokensgeneral_mm_embed_routine)MultimodalDataItemMultimodalInputs)ForwardBatchPPProxyTensors)default_weight_loader)	Glm4Model)!run_dp_sharded_mrope_vision_model)get_global_server_args)
add_prefixis_npu)get_processorc                       s*   e Zd Zdejdejf fddZ  ZS )Glm4vRMSNormxreturnc                    s4   |j }| d|d }t |}||}|S N)shape
contiguousreshapesuperforward)selfr*   original_shapex_2d	__class__ K/home/ubuntu/.local/lib/python3.10/site-packages/sglang/srt/models/glm4v.pyr2   E   s
   
zGlm4vRMSNorm.forward)__name__
__module____qualname__torchTensorr2   __classcell__r8   r8   r6   r9   r)   D   s    "r)   c                       sT   e Zd Z				ddedededee ded	ef fd
dZde	j
fddZ  ZS )Glm4vVisionMLPFN in_featureshidden_featuresbiasquant_configprefixuse_data_parallelc              	      s   t    |r	dnt | _|rdnt | _t||gd ||td|| j| jd| _t	||||td|| j| jd| _
t | _d S )N   r      gate_up_proj
input_sizeoutput_sizesrD   rE   rF   tp_sizetp_rank	down_projrD   rE   rF   rN   rO   )r1   __init__r   rN   r
   rO   r   r&   rJ   r   rP   r   act_fn)r3   rB   rC   rD   rE   rF   rG   r6   r8   r9   rR   N   s.   
			zGlm4vVisionMLP.__init__r*   c                 C   s*   |  |\}}| |}| |\}}|S N)rJ   rS   rP   )r3   r*   gate_up_r8   r8   r9   r2   p   s   
zGlm4vVisionMLP.forward)FNrA   F)r:   r;   r<   intboolr   r   strrR   r=   r>   r2   r?   r8   r8   r6   r9   r@   M   s&    "r@   c                       s   e Zd Z						ddeded	ed
ee dedededededdf fddZ	de
jde
jde
jde
jde
jf
ddZ  ZS )Glm4vVisionBlockNrA   Tr   h㈵>Fdimintermediate_dim	num_headsrE   rF   attn_qkv_biasnum_dummy_headsrms_norm_epsrG   r+   c
           
         sj   t    t||d| _t||d| _t|||dd|d|td|||	d| _t|||td||	d| _	d S )NepsTFattn)	embed_dimr^   projection_sizeuse_qkv_parallel	proj_biasqkv_biasflatten_batchrE   rF   r`   rG   mlprE   rF   rG   )
r1   rR   r   norm1norm2r   r&   rd   r@   rk   )
r3   r\   r]   r^   rE   rF   r_   r`   ra   rG   r6   r8   r9   rR   x   s.   
zGlm4vVisionBlock.__init__r*   
cu_seqlensrotary_pos_emb_cosrotary_pos_emb_sinc                 C   s   |j \}}}|d|}| ||||}	t|	d}	| j|	|||d}
t|
d}
|
d|}| j||d\}}||||}||||}| |}|| }|S )Nr-   zs b h -> b s hro   rp   rq   zb s h -> s b h)residual)r.   r0   rm   r   rd   rn   rk   )r3   r*   ro   rp   rq   SBHx2dhidden_statesrd   attn2d	x_norm_2dx_after_add_2dx_normx_after_addmlp_outr8   r8   r9   r2      s$   


zGlm4vVisionBlock.forward)NrA   Tr   r[   F)r:   r;   r<   rW   r   r   rY   rX   floatrR   r=   r>   r2   r?   r8   r8   r6   r9   rZ   w   sN    	
%rZ   c                       sR   e Zd Z				ddedededed	d
f
 fddZdejd	ejfddZ  ZS )Glm4vVisionPatchEmbed   rI         
patch_sizetemporal_patch_sizein_channelshidden_sizer+   Nc                    sF   t    || _|| _|| _|| _|||f}tj||||dd| _d S )NT)kernel_sizestriderD   )	r1   rR   r   r   r   r   nnConv3dproj)r3   r   r   r   r   r   r6   r8   r9   rR      s   

zGlm4vVisionPatchEmbed.__init__r*   c                 C   s2   | d| j| j| j| j}| | d| j}|S r,   )viewr   r   r   r   r   )r3   r*   r8   r8   r9   r2      s   zGlm4vVisionPatchEmbed.forward)r   rI   r   r   )	r:   r;   r<   rW   rR   r=   r>   r2   r?   r8   r8   r6   r9   r      s"    r   c                       sX   e Zd Z				ddededee deded	ed
df fddZde	j
fddZ  ZS )Glm4vPatchMergerNFrA   d_modelcontext_dimrE   rD   rF   rG   r+   c           	   	      s   t    || _|rdnt }|rdnt }t| j| j||td|d| _t| j| _	t
| j|gd ||td|||d| _t|| j||td|||d	| _t | _d S )
NrH   r   r   )rD   rE   rF   rI   rJ   rK   rP   rQ   )r1   rR   r   r   r
   r   r&   r   r   post_projection_normr   rJ   r   rP   r   GELUextra_activation_func)	r3   r   r   rE   rD   rF   rG   rN   rO   r6   r8   r9   rR      s>   
			zGlm4vPatchMerger.__init__r*   c                 C   s^   |  |\}}| | |}| |\}}|jddd\}}t|| }| |\}}|S )NrI   r-   r\   )r   r   r   rJ   chunkFsilurP   )r3   r*   rV   rU   gateupr8   r8   r9   r2     s   zGlm4vPatchMerger.forward)NFrA   F)r:   r;   r<   rW   r   r   rX   rY   rR   r=   r>   r2   r?   r8   r8   r6   r9   r      s*    )r   c                       s2   e Zd Zdef fddZdejfddZ  ZS )Glm4vVisionEmbeddingsconfigc                    sv   t    || _|j| _|j| _|j| _| j| j d | _| j| _t	
| j| j| _| jdt| jddd d S )NrI   position_ids)rH   r-   F)
persistent)r1   rR   r   r   re   
image_sizer   num_patchesnum_positionsr   	Embeddingposition_embeddingregister_bufferr=   arangeexpand)r3   r   r6   r8   r9   rR     s   

zGlm4vVisionEmbeddings.__init__r+   c                    s  | j j}|jd }|jd }|j}	||	||	}}|dkr+tjd||	|jd}
ntt	r9tj
|	tjdt tjsHtj
 |	tjd |jd }t|d }||||ddddj|	tjd}t fddttD j|	tjd}t fddttD j|	tjd}|j|	tjd}|j|	tjd}|d | d d }|d | d d }tj||fd	d
dd}tj||dddd}|dd	dd}||j|j}
||
 }|S )NrH   r   devicedtypeg      ?rI   c                    "   g | ]} |d f  | qS )rH   repeat.0iimage_shapeslengthsr8   r9   
<listcomp>L     " z1Glm4vVisionEmbeddings.forward.<locals>.<listcomp>c                    r   )rI   r   r   r   r8   r9   r   O  r   r-   r   bicubicFborder)modealign_cornerspadding_mode)r   weightr.   r   tor=   emptyr   
isinstancelisttensorlongr>   rW   r   permute	unsqueezefloat32catrangelenstackr   grid_samplesqueeze)r3   
embeddingsr   r   h_coordsw_coordspos_embed_weightr   	total_seqr   adapted_pos_embedorig_size_sq	orig_sizepos_embed_2dtarget_htarget_wnorm_wnorm_hgridinterpolated_embed_fp32adapted_pos_embed_fp32r8   r   r9   r2   '  sh   






zGlm4vVisionEmbeddings.forward)	r:   r;   r<   r	   rR   r=   r>   r2   r?   r8   r8   r6   r9   r     s
    r   c                       s   e Zd Z			ddedee dededdf
 fd	d
Ze	de
jfddZe	de
jfddZde
jdee
je
je
jf fddZde
jde
jde
jfddZ  ZS )Glm4vVisionModelNrA   Fvision_configrE   rF   rG   r+   c           
         s  t    j}j}j}j}j_j_j_j_j	_	_
t|||jd_jj }	t|	|	d dddd_t fddt|D _tj	jd	td
 d_t_tjjd_tjjj	jjd_tjjd_d S )N)r   r   r   r   rI   i    g     @T)	head_size
rotary_dimmax_positionbaseis_neox_stylec                    s:   g | ]}t jjjtd |  jjdqS )zblocks.)r\   r]   r^   rE   rF   ra   r_   rG   )rZ   r   out_hidden_sizer^   r&   ra   attention_bias)r   	layer_idxrF   rE   r3   rG   r   r8   r9   r     s    z-Glm4vVisionModel.__init__.<locals>.<listcomp>Fmerger)r   r   rE   rD   rF   rG   rb   )r   out_channelsr   r   )r1   rR   r   r   r   depthr   r^   spatial_merge_sizer   rG   r   patch_embedr   rotary_pos_embr   
ModuleListr   blocksr   intermediate_sizer&   r   r   r   r)   ra   post_conv_layernormConv2d
downsamplepost_layernorm)
r3   r   rE   rF   rG   r   r   r   r   head_dimr6   r   r9   rR   r  sf   

	zGlm4vVisionModel.__init__c                 C      | j jjjS rT   )r   r   r   r   r3   r8   r8   r9   r        zGlm4vVisionModel.dtypec                 C   r   rT   )r   r   r   r   r   r8   r8   r9   r     r   zGlm4vVisionModel.devicegrid_thwc                 C   s,  g }|D ]]\}}}t |dd|}t |d|d}||| j | j|| j | jdddd }||| j | j|| j | jdddd }|t j	||gdd
|d qt j|ddj| jdd}|d d dd f  }| j|\}	}
|	| d}|
| d}|||fS )	NrH   r-   r   rI   r   r   T)non_blocking)r=   r   r   r   r0   r   r   flattenappendr   r   r   r   r   maxr   get_cos_sin)r3   r   pos_idsthwhpos_idswpos_idsmax_grid_sizecossincos_combinedsin_combinedr8   r8   r9   rot_pos_emb  s:   "

zGlm4vVisionModel.rot_pos_embr*   c           	   	   C   s  |j | j| jd}| |}| |}| |\}}}t|d d df |d d df  |d d df jdtj	d}t
|d|g}|dd  |d d   }| ||||d d df |d d df }tj
||gdd}tj
||gdd}t r| d}|d}| jD ]
}|||||d	}q| |}|d| j| j|jd }|dd
dd}| |d| j}| |}|S )Nr   rH   rI   r   )r\   r   r-   r   cpurr   r   )r   r   r   r   r   r  r=   repeat_interleavecumsumint32r   	new_zerostolistr   r'   r   r   r   r   r   r.   r   r   r   r   )	r3   r*   r   rp   rq   image_type_idsro   seqlensblkr8   r8   r9   r2     sD   


,"




zGlm4vVisionModel.forward)NrA   F)r:   r;   r<   r	   r   r   rY   rX   rR   propertyr=   r   r   r>   tupler  r2   r?   r8   r8   r6   r9   r   q  s2    M
$&r   c                       s   e Zd Z		d&dedee deddf fddZd	ee	 d
e
fddZdee dejfddZdee dejfddZdd Ze 		d'd	ejdejdededee f
ddZdedejfddZdeeeejf  fd d!Zd"d# Zd$d% Z  ZS )(Glm4vForConditionalGenerationNrA   r   rE   rF   r+   c                    s   t    t | _|| _t j| _t|j	|t
d|| jd| _t| j t||t
d|d| _| jjrU| jjdkrD| jjrD| jj| _nt| jj| jj|t
d|d| _nt | _d| jjv | _t|| _ttjdd	| _ d
| _!d S )Nvisualrl   model)rE   rF   rH   lm_headmrope_sectionT)pooling_type	normalizeF)"r1   rR   r   pp_groupr   r%   mm_enable_dp_encoderrG   r   r   r&   r  r   "update_vit_attn_dummy_heads_configr#   r  is_last_rank
world_sizetie_word_embeddingsembed_tokensr  r   
vocab_sizer   r   rope_scalingis_mrope_enabledr   logits_processorr   r   LASTpoolercapture_aux_hidden_states)r3   r   rE   rF   r6   r8   r9   rR     s<   




z&Glm4vForConditionalGeneration.__init__	input_ids	mm_inputsc                 C   s   t  }|||S rT   )r   pad_input_tokens)r3   r'  r(  patternr8   r8   r9   pad_input_idsO  s   z+Glm4vForConditionalGeneration.pad_input_idsitemsc                 C   s   t jdd |D dd| jj}t jdd |D dd}| dks)J | | dks5J | | jrCt| j||	 ddS | j||d	}|S )
Nc                 S      g | ]}|j qS r8   featurer   itemr8   r8   r9   r   U      zCGlm4vForConditionalGeneration.get_image_feature.<locals>.<listcomp>r   r   c                 S   r-  r8   )image_grid_thwr0  r8   r8   r9   r   X  r2  rI   rope_3d	rope_typer   )
r=   r   typer  r   concatr\   rG   r$   r  )r3   r,  pixel_valuesr3  image_embedsr8   r8   r9   get_image_featureS  s   z/Glm4vForConditionalGeneration.get_image_featurec                 C   s   t jdd |D dd| jj}t jdd |D dd}g }|D ]\}}}t d| | gd	|d}|
| q!t j|dd}	| dksRJ | | dks^J | | jrlt| j||	 dd	S | j||	d
}
|
S )Nc                 S   r-  r8   r.  r0  r8   r8   r9   r   e  r2  zCGlm4vForConditionalGeneration.get_video_feature.<locals>.<listcomp>r   r   c                 S   r-  r8   )video_grid_thwr0  r8   r8   r9   r   h  r2  rH   rI   r4  r5  r7  )r=   r   r8  r  r   r9  r   r1  r   r   r   r\   rG   r$   r  )r3   r,  r:  r=  temp_frames_hwr   r   r   repeated_rowflattened_video_grid_thwvideo_embedsr8   r8   r9   get_video_featurec  s*   $z/Glm4vForConditionalGeneration.get_video_featurec                 C   s   | j jS rT   )r  r  r   r8   r8   r9   get_input_embeddings  s   z2Glm4vForConditionalGeneration.get_input_embeddingsF	positionsforward_batchget_embeddingpp_proxy_tensorsc                 C   s   | j r|j}|j s'| r'| j r'|jdkr|ddks'J d|  t||| j| ||d}d}| j	r;|\}}| j
jrP|sJ| ||| j|S | ||S |S )a  Run forward pass for GLM-4.1V.

        Args:
            input_ids: Flattened (concatenated) input_ids corresponding to a
                batch.
            positions: Flattened (concatenated) position ids corresponding to a
                batch.
                **NOTE**: If mrope is enabled (default setting for GLM-4.1V
                opensource models), the shape will be `(3, seq_len)`,
                otherwise it will be `(seq_len,).
                (Use input_metadata.mrope_positions to replace it)
        rI   r   r   zMmultimodal section rotary embedding requires (3, seq_len) positions, but got )r'  rE  language_modelmultimodal_modelrD  rG  N)r"  mrope_positionsforward_mode	is_decodecontains_image_inputsndimsizer   r  r&  r  r  r#  r  r%  )r3   r'  rD  rE  rF  rG  rx   aux_hidden_statesr8   r8   r9   r2     sB   	z%Glm4vForConditionalGeneration.forwardnameloaded_weightc           	         s$  | j jj}|dkr|S | j jjd|v r^|jddd\}}}|dr,||jd g n|dr6|g ntd|  fd	d
}||||||}}}tj	|||gdd}|S d|v rx|
|jd | }tj	||gdd}|S d|v sd|v r|
| }tj	||gdd}|S )z$pad attn qkv weights for dummy headsr   zattn.qkv_projr   r   z.weightr-   .biaszUnsupported weight with name=c                    s,   t j| ddf|  gddddS )Nr   r-   r   rH   )r=   r   	unflattenr  r   )r*   dummy_shaper   r8   r9   <lambda>  s    
zIGlm4vForConditionalGeneration._pad_vit_attn_dummy_heads.<locals>.<lambda>zattn.proj.weightzattn.q_norm.weightzattn.k_norm.weight)r   r   r`   r   r   endswithr.   RuntimeErrorr=   r   r  )	r3   rQ  rR  r`   wqwkwvpad_funcpadded_weightr8   rU  r9   _pad_vit_attn_dummy_heads  s2   




	z7Glm4vForConditionalGeneration._pad_vit_attn_dummy_headsweightsc              	   C   sJ  g d}t | jdd}|D ]\}}d|v rqd|v r!|dd}d|v r+|dd	}|D ]-\}}}||vr7q-|||}|d
rG||vrGq-||vrLq-|| }	|	j}
|
|	||  nGd|v re|dd}z|d
rq||vrqW q||vrwW q|| }	W n ty   t|   w t|	dt	}
d|v rt
| j||}|
|	| qd S )N))	.qkv_projz.q_projq)ra  z.k_projk)ra  z.v_projv).gate_up_projz.up_projrH   )re  z
.gate_projr   F)remove_duplicatezrotary_emb.inv_freqrH  zmodel.language_model.zmodel.zmodel.visual.zvisual.rS  r  z	attn.qkv.zattn.qkv_proj.weight_loader)dictnamed_parametersreplacerX  rg  KeyErrorprintkeysgetattrr"   r   pad_vit_attn_dummy_headsr   )r3   r`  stacked_params_mappingparams_dictrQ  rR  
param_nameweight_nameshard_idparamrg  r8   r8   r9   load_weights  sR   
z*Glm4vForConditionalGeneration.load_weightsc                 C   s   | j jj| jjfS rT   )r  r  r   r  r   r8   r8   r9   get_embed_and_head#  s   z0Glm4vForConditionalGeneration.get_embed_and_headc                 C   sL   | j j`|| j j_| jjr| j j| _n| j`|| j_tj  tj	  d S rT   )
r  r  r   r   r  r  r=   cudaempty_cachesynchronize)r3   embedheadr8   r8   r9   set_embed_and_head&  s   

z0Glm4vForConditionalGeneration.set_embed_and_head)NrA   )FN)r:   r;   r<   r   r   r   rY   rR   r   rW   r   r+  r   r=   r>   r<  rB  rC  no_gradr    rX   r!   r2   r_  r   r   rv  rw  r}  r?   r8   r8   r6   r9   r    sD    0;Fr  )W__doc__logging	functoolsr   typingr   r   r   r   r=   torch.nnr   torch.nn.functional
functionalr   einopsr   -transformers.models.glm4v.configuration_glm4vr   r	   sglang.srt.distributedr
   r   %sglang.srt.distributed.parallel_stater   sglang.srt.layers.activationr   sglang.srt.layers.attentionr   "sglang.srt.layers.attention.visionr   sglang.srt.layers.layernormr   r   sglang.srt.layers.linearr   r   r   "sglang.srt.layers.logits_processorr   sglang.srt.layers.poolerr   r   *sglang.srt.layers.quantization.base_configr   "sglang.srt.layers.rotary_embeddingr   sglang.srt.layers.utilsr   *sglang.srt.layers.vocab_parallel_embeddingr   sglang.srt.managers.mm_utilsr   r   "sglang.srt.managers.schedule_batchr   r   ,sglang.srt.model_executor.forward_batch_infor    r!   $sglang.srt.model_loader.weight_utilsr"   sglang.srt.models.glm4r#   sglang.srt.multimodal.mm_utilsr$   sglang.srt.server_argsr%   sglang.srt.utilsr&   r'   &sglang.srt.utils.hf_transformers_utilsr(   	getLoggerr:   loggercached_get_processorr)   Moduler@   rZ   r   r   r   r   r  
EntryClassr8   r8   r8   r9   <module>   sZ   
	*H#4[ .  
