o
    پi>                     @   s  d Z ddlZddlmZ ddlmZmZmZ ddlZddl	m
Z
 ddlmZ ddlmZmZ ddlmZ ddlmZ dd	lmZ dd
lmZ ddlmZ ddlmZmZ ddlmZ ddlm Z  ddl!m"Z" ddl#m$Z$ ddl%m&Z& ddl'm(Z( ddl)m*Z*m+Z+m,Z,m-Z-m.Z.m/Z/ ddl0m1Z1 ddl2m3Z3 ddl4m5Z5 e6e7Z8ee5Z9G dd de,Z:G dd de-Z;G dd de
j<Z=G dd de/Z>G dd  d e+Z?G d!d" d"e.Z@G d#d$ d$e*ZAeAgZBdS )%zAInference-only GLM-OCR model compatible with HuggingFace weights.    N)	lru_cache)IterableOptionalTuple)	rearrange)GlmOcrConfigGlmOcrVisionConfig)get_pp_group)vision_utils)VisionAttention)RMSNorm)LogitsProcessor)PoolerPoolingType)QuantizationConfig)get_rope)PPMissingLayer)ParallelLMHead)default_weight_loader)	Glm4Model)Glm4vForConditionalGenerationGlm4vPatchMergerGlm4vRMSNormGlm4vVisionMLPGlm4vVisionModelGlm4vVisionPatchEmbed)get_global_server_args)
add_prefix)get_processorc                   @      e Zd ZdS )GlmOcrRMSNormN__name__
__module____qualname__ r%   r%   M/home/ubuntu/.local/lib/python3.10/site-packages/sglang/srt/models/glm_ocr.pyr    =       r    c                   @   r   )GlmOcrVisionMLPNr!   r%   r%   r%   r&   r(   A   r'   r(   c                       s   e Zd Z						ddeded	ed
ee dedededededdf fddZ	de
jde
jde
jde
jde
jf
ddZ  ZS )GlmOcrVisionBlockN Tr   h㈵>Fdimintermediate_dim	num_headsquant_configprefixattn_qkv_biasnum_dummy_headsrms_norm_epsuse_data_parallelreturnc
           
         sn   t    t||d| _t||d| _t|||d|ddd|td|||	d| _t||d|td||	d| _	d S )NepsTattn)	embed_dimr.   projection_sizeuse_qkv_parallelqkv_bias	proj_biasqk_normalization_by_head_sizeflatten_batchr/   r0   r2   r4   mlp)biasr/   r0   r4   )
super__init__r   norm1norm2r   r   r8   r(   r@   )
selfr,   r-   r.   r/   r0   r1   r2   r3   r4   	__class__r%   r&   rC   F   s2   
zGlmOcrVisionBlock.__init__x
cu_seqlensrotary_pos_emb_cosrotary_pos_emb_sinc                 C   s   |j \}}}|d|}| ||||}	t|	d}	| j|	|||d}
t|
d}
|
d|}| j||d\}}||||}||||}| |}|| }|S )Nzs b h -> b s hrJ   rK   rL   zb s h -> s b h)residual)shapereshaperD   r   r8   rE   r@   )rF   rI   rJ   rK   rL   SBHx2dhidden_statesr8   attn2d	x_norm_2dx_after_add_2dx_normx_after_addmlp_outr%   r%   r&   forwardl   s$   


zGlmOcrVisionBlock.forward)Nr*   Tr   r+   F)r"   r#   r$   intr   r   strboolfloatrC   torchTensorr]   __classcell__r%   r%   rG   r&   r)   E   sN    	
&r)   c                   @   r   )GlmOcrVisionPatchEmbedNr!   r%   r%   r%   r&   re      r'   re   c                   @   r   )GlmOcrVisionPatchMergerNr!   r%   r%   r%   r&   rf      r'   rf   c                       sZ   e Zd Z			ddedee dededdf
 fd	d
Zde	j
de	j
de	j
fddZ  ZS )GlmOcrVisionModelNr*   Fvision_configr/   r0   r4   r5   c           
         s  t    j}j}j}j}j_j_j_j_j	_	j
_
_t|||jd_jj }	t|	|	d dddd_t fddt|D _tj	j	j d	td
 d_tjjj	jjd_tjjd_d S )N)
patch_sizetemporal_patch_sizein_channelshidden_size   i    g     @T)	head_size
rotary_dimmax_positionbaseis_neox_stylec                    s:   g | ]}t jjjtd |  jjdqS )zblocks.)r,   r-   r.   r/   r0   r3   r1   r4   )r)   rl   intermediate_sizer.   r   r3   attention_bias).0	layer_idxr0   r/   rF   r4   rh   r%   r&   
<listcomp>   s    z.GlmOcrVisionModel.__init__.<locals>.<listcomp>Fmerger)d_modelcontext_dimr/   rA   r0   r4   )rk   out_channelskernel_sizestrider6   )rB   rC   ri   rj   rk   depthrl   r.   spatial_merge_sizeout_hidden_sizers   r4   re   patch_embedr   rotary_pos_embnn
ModuleListrangeblocksrf   r   ry   Conv2d
downsampler    r3   post_layernorm)
rF   rh   r/   r0   r4   ri   rj   rk   r   head_dimrG   rw   r&   rC      s`   
	zGlmOcrVisionModel.__init__rI   grid_thwc                 C   s   |j | j| jd}| |}| |\}}}t|d d df |d d df  |d d df jdtjd}t	|
d|g}tj	||gdd}tj	||gdd}|d}| jD ]
}|||||d}qZ| |}|d| j| j|jd }|dd	dd}| |d| j}| |}|S )
N)devicedtype   rm   r   )r,   r   rM   )r,   rN      )tor   r   r   rot_pos_embrb   repeat_interleavecumsumint32cat	new_zeros	unsqueezer   r   viewr   rP   permuter   r   ry   )rF   rI   r   rK   rL   image_type_idsrJ   blkr%   r%   r&   r]      s6   

,



zGlmOcrVisionModel.forward)Nr*   F)r"   r#   r$   r   r   r   r_   r`   rC   rb   rc   r]   rd   r%   r%   rG   r&   rg      s     $Hrg   c                	       sV   e Zd Z		ddedee deddf fddZdd
ee	ee
jf  fddZ  ZS )GlmOcrForConditionalGenerationNr*   configr/   r0   r5   c                    s   t  ||| t | _|| _t j| _t|j	|t
d|| jd| _t| j t||t
d|d| _| jjrX| jjdkrG| jjrG| jj| _nt| jj| jj|t
d|d| _nt | _d| jjv | _t|| _ttjdd	| _ d
| _!d S )Nvisual)rh   r/   r0   r4   model)r/   r0   r   lm_headmrope_sectionT)pooling_type	normalizeF)"rB   rC   r	   pp_groupr   r   mm_enable_dp_encoderr4   rg   rh   r   r   r
   "update_vit_attn_dummy_heads_configr   r   is_last_rank
world_sizetie_word_embeddingsembed_tokensr   r   
vocab_sizerl   r   rope_scalingis_mrope_enabledr   logits_processorr   r   LASTpoolercapture_aux_hidden_states)rF   r   r/   r0   rG   r%   r&   rC     s<   



z'GlmOcrForConditionalGeneration.__init__Fweightsc              	   C   s^  |r%t | jdr!| jj}|dksJ d| jjdkrdn| jj}ntdg d}|r4d| }g d}t| jd	d
}|D ]\}	}
d|	v rGq>d|	v rQ|	dd}	d|	v r[|	dd}	|st | jdr| jj}|dkr|	dr|		d}t
|dkrt|d | jjkrq>n,|	|sq>d|	v sd|	v rq>d}|D ]}||	v r|	|d}	d	} nq|r|	|d}	|D ]-\}}}||	vrq|	||}	|	dr|	|vrq|	|vrq||	 }|j}|||
|  nId|	v r|	dd}	z|	dr|	|vrW q>|	|vrW q>||	 }W n ty   t|   w t|dt}d|	v r't| j|	|
}
|||
 q>d S )Nnum_nextn_predict_layersr   zOnly 1 nextn layer is supportedr   z-num_nextn_predict_layers is not in the config))	.qkv_projz.q_projq)r   z.k_projk)r   z.v_projv).gate_up_projz.up_projr   )r   z
.gate_projr   zmodel.layers.)zshared_head.normeh_projenormhnormF)remove_duplicatezrotary_emb.inv_freqlanguage_modelzmodel.language_model.zmodel.zmodel.visual.zvisual.zmodel.layers.r   rm   zshared_head.headr   Tr   zmodel.decoderz.biasr   z	attn.qkv.zattn.qkv_proj.weight_loader)hasattrr   r   num_hidden_layers
ValueErrordictnamed_parametersreplace
startswithsplitlenr^   endswithr   KeyErrorprintkeysgetattrr   r
   pad_vit_attn_dummy_heads)rF   r   is_nextnnum_nextn_layersnextn_layer_idstacked_params_mappingnextn_layer_prefixnextn_spec_weight_namesparams_dictnameloaded_weight	name_list
is_decoderweight_name
param_nameshard_idparamr   r%   r%   r&   load_weights7  s   	




z+GlmOcrForConditionalGeneration.load_weights)Nr*   )F)r"   r#   r$   r   r   r   r_   rC   r   r   rb   rc   r   rd   r%   r%   rG   r&   r     s    &0r   )C__doc__logging	functoolsr   typingr   r   r   rb   torch.nnr   einopsr   1transformers.models.glm_ocr.configuration_glm_ocrr   r   %sglang.srt.distributed.parallel_stater	   sglang.srt.layers.attentionr
   "sglang.srt.layers.attention.visionr   sglang.srt.layers.layernormr   "sglang.srt.layers.logits_processorr   sglang.srt.layers.poolerr   r   *sglang.srt.layers.quantization.base_configr   "sglang.srt.layers.rotary_embeddingr   sglang.srt.layers.utilsr   *sglang.srt.layers.vocab_parallel_embeddingr   $sglang.srt.model_loader.weight_utilsr   sglang.srt.models.glm4r   sglang.srt.models.glm4vr   r   r   r   r   r   sglang.srt.server_argsr   sglang.srt.utilsr   &sglang.srt.utils.hf_transformers_utilsr   	getLoggerr"   loggercached_get_processorr    r(   Moduler)   re   rf   rg   r   
EntryClassr%   r%   r%   r&   <module>   sF    
Ip 
.