o
    پi                     @   s   d dl Z d dlmZmZmZmZ d dlZd dlmZ d dl	m
Z
 d dlmZ d dlmZ d dlmZ d dlmZmZ d dlmZmZ d d	lmZ d d
lmZ d dlmZ d dlmZ d dlm Z  e !e"Z#G dd dej$Z%e%gZ&dS )    N)IterableListOptionalTuple)DotsOCRConfig)LogitsProcessor)QuantizationConfig)ParallelLMHead)/MultiModalityDataPaddingPatternMultimodalTokensgeneral_mm_embed_routine)MultimodalDataItemMultimodalInputs)ForwardBatch)default_weight_loader)DotsVisionTransformer)Qwen2ForCausalLM)
add_prefixc                
       s   e Zd Z		ddedee deddf fddZd	ee	 d
e
fddZdee dejfddZdedejfddZd	ejdejdededejf
ddZdeeeejf  fddZdd Z  ZS )DotsOCRForCausalLMN configquant_configprefixreturnc                    sd   t    || _t|j| _t||| _|jr| jj	| _
nt|j|j|td|d| _
t|| _d S )Nlm_head)r   r   )super__init__r   r   vision_configvisualr   modeltie_word_embeddingsembed_tokensr   r	   
vocab_sizehidden_sizer   r   logits_processor)selfr   r   r   	__class__ N/home/ubuntu/.local/lib/python3.10/site-packages/sglang/srt/models/dots_ocr.pyr      s   
zDotsOCRForCausalLM.__init__	input_ids	mm_inputsc                 C   s   t  }|||S N)r
   pad_input_tokens)r$   r)   r*   patternr'   r'   r(   pad_input_ids;   s   z DotsOCRForCausalLM.pad_input_idsitemsc                 C   s   t jdd |D dd| jj}t jdd |D dd| jj}| dks1J d| | dks@J d| | ||}t	| j
d	r\| j
jjj}|j|kr\||}|S )
Nc                 S      g | ]}|j qS r'   )feature.0itemr'   r'   r(   
<listcomp>A       z8DotsOCRForCausalLM.get_image_feature.<locals>.<listcomp>r   dimc                 S   r0   r'   )image_grid_thwr2   r'   r'   r(   r5   E   r6      zpixel_values.dim()=zimage_grid_thw.dim()=r    )torchcattyper   dtypeconcattodevicer8   hasattrr   r    weight)r$   r/   pixel_valuesr9   image_embedstarget_dtyper'   r'   r(   get_image_feature?   s    


z$DotsOCRForCausalLM.get_image_featurenameloaded_weightc           	         s  | j jj}|dkr|S | j jjd|v r\|jddd\}}}|dr,||jd g n|dr6|g ntd|  fd	d
}||||||}}}tj	|||gdd}d|v rt|
|jd | }tj	||gdd}d|v s|d|v r|
| }tj	||gdd}|S )z$pad attn qkv weights for dummy headsr   zattn.qkv_proj   r7   z.weightz.biaszUnsupported weight with name=c                    s,   t j| ddf|  gddddS )Nr   rK   r7      )r;   r<   	unflatten	new_zerosflatten)xdummy_shapehead_dimr'   r(   <lambda>g   s    
z>DotsOCRForCausalLM._pad_vit_attn_dummy_heads.<locals>.<lambda>zattn.proj.weightzattn.q_norm.weightzattn.k_norm.weight)r   r   num_dummy_headsrS   chunkendswithshapeRuntimeErrorr;   r<   rN   )	r$   rH   rI   rU   wqwkwvpad_funcpadded_weightr'   rQ   r(   _pad_vit_attn_dummy_headsX   s.   




z,DotsOCRForCausalLM._pad_vit_attn_dummy_heads	positionsforward_batchkwargsc                 K   s   t |||| | jd}|S )N)r)   r`   ra   multimodal_modellanguage_model)r   r   )r$   r)   r`   ra   rb   hidden_statesr'   r'   r(   forwardv   s   zDotsOCRForCausalLM.forwardweightsc                 C   s   t |}g }g }|D ]\}}|dr!|dd}|||f q
|||f q
t|}t| jdd}| D ]+\}}|dd}||vrOtd| d	|| }	t|	d
t	}
| 
||}|
|	| q9|ro| j| dS dS )zBLoad weights for the model, separating vision and language weightszvision_tower.z	attn.qkv.zattn.qkv_proj.F)remove_duplicatevision_towerr   zWeight z not found in params_dictweight_loaderN)list
startswithreplaceappenddictnamed_parametersr/   
ValueErrorgetattrr   r_   r   load_weights)r$   rg   vision_weightslanguage_weightsrH   rI   vision_namevision_state_dictparams_dictparamrj   r'   r'   r(   rs      s*   
zDotsOCRForCausalLM.load_weightsc                 C   s   | j jj| jjfS r+   )r   r    rC   r   )r$   r'   r'   r(   get_embed_and_head   s   z%DotsOCRForCausalLM.get_embed_and_head)Nr   )__name__
__module____qualname__r   r   r   strr   r   intr   r.   r   r;   TensorrG   r_   r   objectrf   r   r   rs   rz   __classcell__r'   r'   r%   r(   r      s8    
!r   )'loggingtypingr   r   r   r   r;   torch.nnnnsglang.srt.configsr   "sglang.srt.layers.logits_processorr   *sglang.srt.layers.quantization.base_configr   *sglang.srt.layers.vocab_parallel_embeddingr	   sglang.srt.managers.mm_utilsr
   r   "sglang.srt.managers.schedule_batchr   r   ,sglang.srt.model_executor.forward_batch_infor   $sglang.srt.model_loader.weight_utilsr   sglang.srt.models.dots_vlm_vitr   sglang.srt.models.qwen2r   sglang.srt.utilsr   	getLoggerr{   loggerModuler   
EntryClassr'   r'   r'   r(   <module>   s&   
 
