o
    پi                     @   s   d Z ddlmZmZmZ ddlZddlmZ ddlm	Z	m
Z
 ddlmZ ddlmZ ddlmZ G dd	 d	eZG d
d dejZeZdS )zInference-only Yi-VL model.    )IterableOptionalTupleN)CLIPVisionModelLlavaConfig)QuantizationConfig)default_weight_loader)LlavaLlamaForCausalLMc                	       sT   e Zd Z		ddedee deddf fddZd	ee	ee
jf  fd
dZ  ZS )YiVLForCausalLMN configquant_configprefixreturnc                    s4   t  j|||d t| j| _| jjdd| _d S )N)r   z./r   )super__init__YiVLMultiModalProjectorr   multi_modal_projectormm_vision_towerreplacevision_tower_subfolder)selfr   r   r   	__class__ J/home/ubuntu/.local/lib/python3.10/site-packages/sglang/srt/models/yivl.pyr      s
   
zYiVLForCausalLM.__init__weightsc           
      C   sh  t j| jjtj| jdd| _| j	  | jj
| _| jj| _| jjj| _| jjj| _t| jdd| _t| jdd| _t| jdd | _t| j| j d | _| jd	krTn| jd
kra|  jd7  _ntd| j dddddd}t|  }t|}|D ].\}}d|v sd|v r| D ]\}}||v r|||}q|| }t|dt}	|	|| q}| j| d S )N)torch_dtype	subfoldercudamm_patch_merge_typeflatimage_aspect_ratiosquareimage_grid_pinpoints   patch	cls_patch   zUnexpected select feature: zmulti_modal_projector.linear_1zmulti_modal_projector.ln_1zmulti_modal_projector.linear_2zmulti_modal_projector.ln_2vision_tower)zmodel.mm_projector.0zmodel.mm_projector.1zmodel.mm_projector.3zmodel.mm_projector.4zmodel.vision_tower.vision_tower	projectorweight_loader) r   from_pretrainedr   _name_or_pathtorchfloat16r   tor)   evalmm_vision_select_layervision_feature_layermm_vision_select_featurevision_feature_select_strategy
image_size
patch_sizegetattrr    r"   r$   intimage_feature_len
ValueErrorselect_featuredictnamed_parameterslistitemsr   r   language_modelload_weights)
r   r   projector_weightsparams_dictnameloaded_weightweight_name
param_nameparamr+   r   r   r   rB   )   sP   





zYiVLForCausalLM.load_weights)Nr   )__name__
__module____qualname__r   r   r   strr   r   r   r.   TensorrB   __classcell__r   r   r   r   r
      s    $r
   c                       s*   e Zd Zdef fddZdd Z  ZS )r   r   c                    sd   t    t|jj|jj| _t|jj| _	t
 | _t|jj|jj| _t|jj| _d S N)r   r   nnLinearvision_confighidden_sizetext_configlinear_1	LayerNormln_1GELUactlinear_2ln_2)r   r   r   r   r   r   ]   s   

z YiVLMultiModalProjector.__init__c                 C   s6   |  |}| |}| |}| |}| |}|S rP   )rV   rX   rZ   r[   r\   )r   image_featureshidden_statesr   r   r   forwardj   s   




zYiVLMultiModalProjector.forward)rJ   rK   rL   r   r   r_   rO   r   r   r   r   r   \   s    r   )__doc__typingr   r   r   r.   torch.nnrQ   transformersr   r   *sglang.srt.layers.quantization.base_configr   $sglang.srt.model_loader.weight_utilsr   sglang.srt.models.llavar	   r
   Moduler   
EntryClassr   r   r   r   <module>   s   A