o
    
۾i3                     @   s  d Z ddlmZ ddlmZ ddlmZ ddlZddlm	Z	 ddl
mZ er,ddlmZ ddlmZ dd	lmZmZ dd
lmZ ddlmZ ddlmZ ddlmZ ddlmZ ddlmZmZ ddl m!Z! ddl"m#Z# ddl$m%Z% ddl&m'Z'm(Z(m)Z)m*Z*m+Z+m,Z,m-Z-m.Z.m/Z/ ddl0m1Z1 ddlm2Z2 ddl3m4Z4m5Z5 ee6Z7G dd de-Z8G dd de	j9Z:G dd de,Z;G dd de.Z<G d d! d!e*Z=G d"d# d#e/Z>e1j?e)e+e'd$G d%d& d&e(Z@dS )'zAInference-only GLM-OCR model compatible with HuggingFace weights.    )Callable)partial)TYPE_CHECKINGN)	rearrange)GlmOcrVisionConfig)
VllmConfig)$get_tensor_model_parallel_world_sizeparallel_state)utils)init_logger)MMEncoderAttention)Conv2dLayer)RMSNorm)QKVParallelLinearRowParallelLinear)QuantizationConfig)get_rope)ApplyRotaryEmb)	Glm4vDummyInputsBuilderGlm4vForConditionalGenerationGlm4vMultiModalProcessorGlm4vPatchMergerGlm4vProcessingInfoGlm4vVisionBlockGlm4vVisionMLPGlm4vVisionPatchEmbedGlm4vVisionTransformer)MULTIMODAL_REGISTRY   )maybe_prefix)get_vit_attn_backendis_vit_use_data_parallelc                   @      e Zd ZdS )GlmOcrVisionMLPN__name__
__module____qualname__ r(   r(   V/home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/models/glm_ocr.pyr#   R       r#   c                       s   e Zd Z		ddededededB deddf fd	d
Zdejde	ejdf fddZ
	ddejdejdejdejdejdB dejfddZ  ZS )GlmOcrVisionAttentionN 	embed_dim	num_headsprojection_sizequant_configprefixreturnc              
      s   t    t }|rdnt | _|rdnt | _t	||| _
t	|| j| _|| | _t| jdd| _t| jdd| _t|| j
||d||rL| dn| d|d| _t|||| d	d|d
| _t| j| j
| j
d | dd| _tdd| _d S )Nr   r   h㈵>epsTz	.qkv_projz.qkv)hidden_size	head_sizetotal_num_headstotal_num_kv_headsbiasr0   r1   
disable_tpz.proj)
input_sizeoutput_sizer0   r1   r:   r;   g      .attn)r.   r7   scaler1   )enforce_enable)super__init__r!   r   tp_sizer	   get_tensor_model_parallel_ranktp_rank
dist_utilsdividehidden_size_per_attention_head!num_attention_heads_per_partitionhead_dimr   q_normk_normr   qkvr   projr   attnr   apply_rotary_emb)selfr-   r.   r/   r0   r1   use_data_parallel	__class__r(   r)   rB   W   sP   

	zGlmOcrVisionAttention.__init__rM   .c                    sX   |j \}}}|jddd\}}}||| j| jf  fdd|||fD \}}}|||fS )N      dimc                 3   s    | ]}|j   V  qd S N)view.0x	new_shaper(   r)   	<genexpr>   s    z2GlmOcrVisionAttention.split_qkv.<locals>.<genexpr>)shapechunkrI   rH   )rQ   rM   seq_lenbs_qkvr(   r^   r)   	split_qkv   s   
zGlmOcrVisionAttention.split_qkvr]   
cu_seqlensrotary_pos_emb_cosrotary_pos_emb_sin
max_seqlenc                 C   s   |  |\}}| |\}}}	|j|j}
}| |d| j|
}| |d| j|}dd |||	fD \}}}	|d ur_|d ur_tj	||gdd}| 
|||}tj|ddd\}}| j|||	||d}t|d }| |\}}|S )	Nc                 s   s    | ]
}t |d  V  qdS )zs b ... -> b s ...N)r   
contiguousr[   r(   r(   r)   r`      s    z0GlmOcrVisionAttention.forward.<locals>.<genexpr>r   rW   rV   )querykeyvaluerj   rm   zb s h d -> s b (h d))rM   ri   ra   rK   reshaperJ   rZ   rL   torchcatrP   rb   rO   r   ro   rN   )rQ   r]   rj   rk   rl   rm   re   rf   rg   rh   q_shapek_shape	qk_concat
qk_rotatedcontext_layeroutputr(   r(   r)   forward   s0   	zGlmOcrVisionAttention.forward)Nr,   rY   )r%   r&   r'   intr   strrB   rt   Tensortupleri   r|   __classcell__r(   r(   rS   r)   r+   V   s>    9r+   c                       sV   e Zd Z			ddedededeegejf dB dedB ded	df fd
dZ	  Z
S )GlmOcrVisionBlockNr,   rX   r.   mlp_hidden_dim
norm_layerr0   r1   r2   c                    sx   t  |||||| |d u rttjdd}||| _||| _t||||| dd| _t	||d|| dd| _
d S )Ngư>r4   r>   )r-   r.   r/   r0   r1   Tz.mlp)r:   r0   r1   )rA   rB   r   nn	LayerNormnorm1norm2r+   rO   r#   mlp)rQ   rX   r.   r   r   r0   r1   rS   r(   r)   rB      s4   	

zGlmOcrVisionBlock.__init__)NNr,   )r%   r&   r'   r}   r   r   Moduler   r~   rB   r   r(   r(   rS   r)   r      s&    r   c                   @   r"   )GlmOcrVisionPatchEmbedNr$   r(   r(   r(   r)   r      r*   r   c                   @   r"   )GlmOcrPatchMergerNr$   r(   r(   r(   r)   r      r*   r   c                       sf   e Zd Z			ddddededB ded	df
 fd
dZdejdeje	e	e
  B d	ejfddZ  ZS )GlmOcrVisionTransformerr3   Nr,   vision_configr   norm_epsr0   r1   r2   c           
         s(  t  | ``j}j}j}j}j_j	_	j_j
_
j_t|||jd_tt|d jj	 }	t|	ddddid_t fdd	t|D _tjjj d
 dd_tjjj
j
d_tjjd_t|	t d_d S )N)
patch_sizetemporal_patch_sizein_channelsr6   r4   i    Tpartial_rotary_factorg      ?)r7   max_positionis_neox_stylerope_parametersc                    s0   g | ]}t jjj  d | dqS )z.blocks.)rX   r.   r   r   r0   r1   )r   r6   r.   intermediate_size)r\   	layer_idxr   r1   r0   rQ   r   r(   r)   
<listcomp>"  s    	z4GlmOcrVisionTransformer.__init__.<locals>.<listcomp>Fz.merger)d_modelcontext_dimr0   r:   r1   )r   out_channelskernel_sizestride)r7   dtype) rA   rB   post_conv_layernorm
embeddingsr   r   r   depthr6   r.   spatial_merge_sizeout_hidden_sizer   patch_embedr   r   r   rotary_pos_embr   
ModuleListrangeblocksr   mergerr   
downsamplerms_norm_epspost_layernormr    rt   get_default_dtypeattn_backend)
rQ   r   r   r0   r1   r   r   r   r   rJ   rS   r   r)   rB      sf   	
z GlmOcrVisionTransformer.__init__r]   grid_thwc           	      C   s2  t |trtj|tjd}|j| j| jd}| |}| 	|\}}}t
|d d df |d d df  |d d df jdtjd}t|d|g}|j| jdd}| |}|d}| jD ]}||||||d	}qb| |}|d
| j| j|jd
 }|dddd}| |d
| j}| |}|S )N)r   )devicer   r   rV   r   )rX   r   T)non_blocking)rj   rk   rl   rm   rn   rU   )
isinstancelistrt   tensorint32tor   r   r   rot_pos_embrepeat_interleavecumsumru   	new_zeroscompute_attn_mask_seqlen	unsqueezer   r   rZ   r   ra   permuter   r   r   )	rQ   r]   r   rk   rl   image_type_idsrj   rm   blkr(   r(   r)   r|   E  s<   


,



	
zGlmOcrVisionTransformer.forward)r3   Nr,   )r%   r&   r'   floatr   r~   rB   rt   r   r   r}   r|   r   r(   r(   rS   r)   r      s,    Jr   )infodummy_inputsc                       s,   e Zd Zdddedef fddZ  ZS )GlmOcrForConditionalGenerationr,   )r1   vllm_configr1   c                   sv   t  j||d |jj}|j}| |ddh t|jt|dd|t	|dd| _
W d    d S 1 s4w   Y  d S )N)r   r1   imagevideor   r3   visual)r   r0   r1   )rA   rB   model_config	hf_configr0   _mark_tower_modelr   r   getattrr   r   )rQ   r   r1   configr0   rS   r(   r)   rB   {  s   

"z'GlmOcrForConditionalGeneration.__init__)r%   r&   r'   r   r~   rB   r   r(   r(   rS   r)   r   u  s    $r   )A__doc__collections.abcr   	functoolsr   typingr   rt   torch.nnr   einopsr   1transformers.models.glm_ocr.configuration_glm_ocrr   vllm.configr   vllm.distributedr   r	   r
   rF   vllm.loggerr   9vllm.model_executor.layers.attention.mm_encoder_attentionr   vllm.model_executor.layers.convr   $vllm.model_executor.layers.layernormr   !vllm.model_executor.layers.linearr   r   'vllm.model_executor.layers.quantizationr   +vllm.model_executor.layers.rotary_embeddingr   2vllm.model_executor.layers.rotary_embedding.commonr   "vllm.model_executor.models.glm4_1vr   r   r   r   r   r   r   r   r   vllm.multimodalr   r   visionr    r!   r%   loggerr#   r   r+   r   r   r   r   register_processorr   r(   r(   r(   r)   <module>   sJ   ,v&{