o
    پia                     @   sH  d dl Z d dlmZ d dlZd dlZd dlmZ d dlm  mZ	 d dlm
Z
 d dlmZ d dlmZ d dlm  m  mZ d dlm  m  mZ d dlm  mZ d dlmZ d dlmZ d dlmZ d d	lmZ d d
lm Z m!Z!m"Z" d dl#m$Z$ d dl%m&Z& dZ'G dd dej(Z)G dd dej(Z*G dd dej(Z+e+gZ,dS )    N)Iterable)Tensor)BaseModelOutputWithPooling)SiglipVisionModel)JetVLMConfig)LogitsProcessorOutput)QuantizationConfig)/MultiModalityDataPaddingPatternMultimodalTokens)ModalityMultimodalDataItemMultimodalInputs)ForwardBatch)JetNemotronForCausalLMi  c                   @   s   e Zd ZdedefddZdS )JetVLMDownSample2x2BlockFixxreturnc                 C   sV   |j \}}}t|}tj|d||d}|d dkr t|d}tj|dddd}|S )Nzb (h w) d -> b h w d)hw      )r   r   r   r   r   r   z&b (h p1) (w p2) d -> b (h w) (p1 p2 d))p1p2)shapemathisqrteinops	rearrangeFpad)selfr   _seq_len	feat_sizefeatures r$   M/home/ubuntu/.local/lib/python3.10/site-packages/sglang/srt/models/jet_vlm.pyforward   s   
z#JetVLMDownSample2x2BlockFix.forwardN)__name__
__module____qualname__r   r&   r$   r$   r$   r%   r      s    r   c                       s8   e Zd Zdeddf fddZdedefddZ  ZS )	JetVLMMultiModalProjectorconfigr   Nc              
      sR   t    tt ttd ttd |jj	t
 t|jj	|jj	| _d S )N   )super__init__nn
Sequentialr   	LayerNormMM_HIDDEN_SIZELineartext_confighidden_sizeGELUlayers)r   r+   	__class__r$   r%   r.   1   s   

z"JetVLMMultiModalProjector.__init__r   c                 C   s
   |  |S N)r7   )r   r   r$   r$   r%   r&   <   s   
z!JetVLMMultiModalProjector.forward)r'   r(   r)   r   r.   r   r&   __classcell__r$   r$   r8   r%   r*   0   s    r*   c                       s   e Zd Z		ddededB deddf fddZ		dd
ededede	de
f
ddZdee defddZdeeeef  ddfddZd
ee dedee fddZ  ZS )JetVLMForConditionalGenerationN r+   quant_configprefixr   c                    sD   t    || _t|j| _t|| _t|j	|t
d|d| _d S )Nllm)r+   r>   r?   )r-   r.   r+   r   vision_configvision_towerr*   mm_projectorr   r4   utils
add_prefixr@   )r   r+   r>   r?   r8   r$   r%   r.   A   s   


z'JetVLMForConditionalGeneration.__init__F	input_ids	positionsforward_batchget_embeddingc                 C   s:   t j||| jtj| jtj| ji||d}t|tsJ |S )N)rF   rH   language_modeldata_embedding_funcsrI   rG   )	mm_utilsgeneral_mm_embed_routiner@   r
   IMAGEget_image_featureVIDEO
isinstancer   )r   rF   rG   rH   rI   outputr$   r$   r%   r&   S   s   z&JetVLMForConditionalGeneration.forwardmm_inputc                 C   sX   t jdd |D dd}| j|dd}|jd usJ |jd }| |}t|d}|S )	Nc                 S   s   g | ]}t |jqS r$   )torchtensorfeature).0r   r$   r$   r%   
<listcomp>k   s    zDJetVLMForConditionalGeneration.get_image_feature.<locals>.<listcomp>r   )dimT)output_hidden_stateszn p d -> (n p) d)rT   catrB   hidden_statesrC   r   r   )r   rS   pixel_valuesvision_tower_outputvision_featuresr$   r$   r%   rO   j   s   

z0JetVLMForConditionalGeneration.get_image_featureweightsc                 C   sh   t |  }|D ])\}}|dr!| j|tdd  |fg q|| }t|dtj}||| qd S )Nzllm.weight_loader)	dictnamed_parameters
startswithr@   load_weightslengetattrweight_utilsdefault_weight_loader)r   ra   params_dictnameloaded_weightparamrb   r$   r$   r%   rf   {   s   
 z+JetVLMForConditionalGeneration.load_weights	mm_inputsc                 C   s   t  }|||S r:   )r	   pad_input_tokens)r   rF   ro   patternr$   r$   r%   pad_input_ids   s   z,JetVLMForConditionalGeneration.pad_input_ids)Nr=   )F)r'   r(   r)   r   r   strr.   r   r   boolr   r&   listr   rO   r   tuplerf   intr   rr   r;   r$   r$   r8   r%   r<   @   sB    
r<   )-r   collections.abcr   r   rT   torch.nnr/   torch.nn.functional
functionalr   r   transformers.modeling_outputsr   transformers.models.siglipr   sglang.srt.managers.mm_utilssrtmanagersrL   $sglang.srt.model_loader.weight_utilsmodel_loaderri   sglang.srt.utilsrD   sglang.srt.configs.jet_vlmr   "sglang.srt.layers.logits_processorr   *sglang.srt.layers.quantization.base_configr   r	   "sglang.srt.managers.schedule_batchr
   r   r   ,sglang.srt.model_executor.forward_batch_infor   sglang.srt.models.jet_nemotronr   r2   Moduler   r*   r<   
EntryClassr$   r$   r$   r%   <module>   s0    
O