o
    پi3                     @   s   d dl mZmZmZmZ d dlZd dlm  mZ	 d dl
mZmZ d dlmZ d dlmZmZ d dlmZ d dlmZ d dlmZmZ d d	lmZmZ d d
lmZ d dlmZ d dlmZ d dl m!Z! G dd dej"Z#G dd dej"Z$e$Z%dS )    )IterableListOptionalTupleN)	rearrangerepeat)nn)DeepseekVL2ConfigDeepseekVL2MlpProjectorConfig)ReplicatedLinear)QuantizationConfig)/MultiModalityDataPaddingPatternMultimodalTokensgeneral_mm_embed_routine)MultimodalDataItemMultimodalInputs)ForwardBatch)default_weight_loader)DeepseekForCausalLM)DeepseekV2ForCausalLMc                       s6   e Zd Z	ddedee f fddZdd Z  ZS )	DeepseekVL2MlpProjectorNconfigquant_configc                    s  t    || _|jdkrt }n|jdkr&tt|j|j	|dg| _
n|jdkrZ|j}tt|j|j	|dg| _
td|D ]}| j
t  | j
t|j	|j	|d qAnd|jdkr|j}|j}tt|j|j |j |j	| |dg| _
td|d D ]}| j
t  | j
t|j	| |j	| |d q| j
t  | j
t|j	| |j	|d ntd|j |jrt|jd |j|d| _d S d S )	Nidentitylinear)r   mlp_gelu   downsample_mlp_geluzUnknown projector type:    )super__init__r   projector_typer   Identity
ModuleListr   	input_dimn_embedlayersdepthrangeappendGELU	mlp_ratiodownsample_ratio
ValueErrortoken_poolingtoken_pooling_layer)selfr   r   modules	mlp_depth_r*   	__class__ R/home/ubuntu/.local/lib/python3.10/site-packages/sglang/srt/models/deepseek_vl2.pyr      s   





	

	z DeepseekVL2MlpProjector.__init__c              	   C   s  | j jra|j\}}}t|d  }}|||||}|dddd}|dddddd}| \}}}}	}
}
| ||||	 d}|dddd }||||	 |d }| 	|d }n_| j j
dkr|j\}}}t|d  }}	 || j j r| j j|| j j  }nd}|||||}|dkrt|ddd|d|fd	d}	 |dddd}tj|| j j| j jdd
}|ddd}| jD ]}||}t|tr|d }q|S )N      ?r      r      r   r   constant)kernel_sizestridepadding)r   r-   shapeintviewpermuteunfoldsize
contiguousr.   r    r+   reshapeFpadr%   
isinstancetuple)r/   x
batch_sizewxhchannelswhpatches	h_patches	w_patchesr2   bshwr#   rH   layerr5   r5   r6   forwardo   sL   

zDeepseekVL2MlpProjector.forwardN)	__name__
__module____qualname__r
   r   r   r   rW   __classcell__r5   r5   r3   r6   r      s    Ur   c                       s   e Zd Z	ddedee f fddZdee dejfddZ	d	e
jd
e
jdedefddZdeeee
jf  fddZd	ee defddZdee fddZ  ZS )DeepseekVL2ForCausalLMNr   r   c                    s   t    |j}| ||| _|j}t||| _|j| _|j	| _	dt
t
j|jt
jd }| jdkrLtt
|j| | _tt
|j| | _ntd| j |j}|jrat|| _d S t|| _d S )Nr   dtype2Dztile tag should be 2D, but got )r   r   vision_config_init_vision_modulevisionprojector_configr   	projectortile_tagglobal_view_postorchsqrttensorr$   float32r   	Parameterrandnimage_newlineview_seperatorr,   language_configuse_mlar   language_modelr   )r/   r   r   ra   rd   	embed_stdrp   r3   r5   r6   r      s,   

zDeepseekVL2ForCausalLM.__init__returnc                 C   sN   zdd l }W n ty   tdtw |jdddddd}|jt d}|S )Nr   zPlease install timmz#vit_so400m_patch14_siglip_384.webliFT)
pretrainednum_classesdynamic_img_sizedynamic_img_padr^   )timmImportErrorcreate_modeltorh   get_default_dtype)r/   ra   r   ry   modelr5   r5   r6   rb      s   
z*DeepseekVL2ForCausalLM._init_vision_module	input_ids	positionsforward_batchkwargsc                 K   s   t |||| | jd}|S )N)r   r   r   multimodal_modelrr   )r   rr   )r/   r   r   r   r   hsr5   r5   r6   rW      s   zDeepseekVL2ForCausalLM.forwardweightsc                 C   st   g d}t |  }t|}|D ]'\}}d|v r(|dd}| j||fg q|| }t|dt}||| qd S )N))qkv_projq_projq)r   k_projk)r   v_projv)gate_up_projup_projr   )r   	gate_projr   languagez	language. weight_loader)dictnamed_parameterslistreplacerr   load_weightsgetattrr   )r/   r   stacked_params_mappingparams_dictnameloaded_weightparamweights_loaderr5   r5   r6   r      s   z#DeepseekVL2ForCausalLM.load_weights	mm_inputsc                 C   s   t  }|||S rX   )r   pad_input_tokens)r/   r   r   patternr5   r5   r6   pad_input_ids   s   z$DeepseekVL2ForCausalLM.pad_input_idsitemsc              
   C   s  t jdd |D dd}| dksJ g }|D ]}|j dks#J | j|jt| j j	j
t| j jd}| |}|j\}}}	t|d  }
}d}t|jjd	 D ]}|jd|f \}}|dksm|dkro n|| }|| }||d	 |d	 |  }||d	 7 }||
||	}t| jd
|
d}t j||gd	d}|d|	}t|d|||
|d}t| jd||
d}t j||gd	d}|d|	}| jdkrt || jd d d f |g}nt || jd d d f |g}|| qZqt j|ddS )Nc                 S   s   g | ]}|j qS r5   )images_spatial_crop).0itemr5   r5   r6   
<listcomp>  s    z<DeepseekVL2ForCausalLM.get_image_feature.<locals>.<listcomp>r   )dimr8   r   )devicer7   r   z
d -> h 1 d)rP   r:   z"(th tw) (h w) d -> (th h) (tw w) d)thtwrP   rO   zd -> (th h) 1 d)r   rP   head)rh   catr   featurerc   forward_featurestypenext
parametersr_   r|   r   re   r?   r@   r'   r   rA   r   rn   r   rg   ro   r(   )r/   r   r   images_in_this_batchr   image_featureimages_embedsr2   rU   n_dimrP   rO   
tile_indexjdxnum_width_tilesnum_height_tilesnum_tiles_in_imageglobal_featureslocal_featuresnew_lines_in_globalnew_lines_in_localglobal_local_featuresr5   r5   r6   get_image_feature  s   


z(DeepseekVL2ForCausalLM.get_image_featurerX   )rY   rZ   r[   r	   r   r   r   r   Modulerb   rh   Tensorr   objectrW   r   r   strr   r   r@   r   r   r   r   r\   r5   r5   r3   r6   r]      s0    '

r]   )&typingr   r   r   r   rh   torch.nn.functionalr   
functionalrG   einopsr   r   sglang.srt.configs.deepseekvl2r	   r
   sglang.srt.layers.linearr   *sglang.srt.layers.quantization.base_configr   sglang.srt.managers.mm_utilsr   r   "sglang.srt.managers.schedule_batchr   r   ,sglang.srt.model_executor.forward_batch_infor   $sglang.srt.model_loader.weight_utilsr   sglang.srt.models.deepseekr   sglang.srt.models.deepseek_v2r   r   r   r]   
EntryClassr5   r5   r5   r6   <module>   s&      M