o
    -iJ                     @   s   d dl mZ d dlZd dlmZ d dlmZ d dlmZ d dl	m
Z
 d dlmZ d dlmZ dd	lmZmZmZ dd
lmZ ddlmZ G dd deZG dd dee ZG dd dejZejeeedG dd deZdS )    )MappingN)GELUActivation)
VllmConfig)BaseDummyOptions)MULTIMODAL_REGISTRY)MultiModalDataDict   )LlavaDummyInputsBuilderLlavaNextMultiModalProcessorLlavaNextProcessingInfo)&LlavaOnevisionForConditionalGeneration)WeightsMapperc                   @   sL   e Zd Zdd ZdefddZdededed	ed
edeeef fddZdS )BeeProcessingInfoc                 C   s
   | j  S N)ctxget_hf_config)self r   [/home/ubuntu/veenaModal/venv/lib/python3.10/site-packages/vllm/model_executor/models/bee.pyr      s   
zBeeProcessingInfo.get_hf_configkwargsc                 K   s   | j jdi |S )Nr   )r   get_hf_processor)r   r   r   r   r   r      s   z"BeeProcessingInfo.get_hf_processororiginal_heightoriginal_widthnpatchesnum_patch_heightnum_patch_widthreturnc                C   s  ddl }|| }|| }|| }	|| }
|	|
kr0tt|||  d}|| d }|d|  }ntt|||  d}|| d }|d|  }|| }|}|  }t|dd}t|dd}||| ||d   }|d	krt|| }t|| }|| }|}||fS )
zAOverride to use correct max_num_patches from vision_aspect_ratio.r   N      vision_aspect_ratioanyres_max_9anyres_max_ g?)mathintroundr   getattrreplacesqrt)r   r   r   r   r   r   r#   current_heightcurrent_widthaspect_ratiocurrent_aspect_ratio
new_heightpadding	new_widthunpadded_featuresnewline_features	hf_configr   max_num_patchesratioheight_factorwidth_factorr   r   r   _get_num_unpadded_features   s<   
z,BeeProcessingInfo._get_num_unpadded_featuresN)	__name__
__module____qualname__r   objectr   r$   tupler7   r   r   r   r   r      s     
r   c                	   @   sX   e Zd Zdeeef defddZ	d
dedeeef deeef dB defdd	Z	dS )BeeDummyInputsBuilder	mm_countsr   c                 C   s   | dd}d}|| S )Nimager   z<image>)get)r   r>   
num_imagesimage_tokenr   r   r   get_dummy_textS   s   z$BeeDummyInputsBuilder.get_dummy_textNseq_len
mm_optionsc                 C   sB   | dd}| j \}}|r| dnd }d| j||||diS )Nr?   r   )widthheightrA   	overrides)r@   info!get_image_size_with_most_features_get_dummy_images)r   rD   r>   rE   rA   target_widthtarget_heightimage_overridesr   r   r   get_dummy_mm_dataY   s   z'BeeDummyInputsBuilder.get_dummy_mm_datar   )
r8   r9   r:   r   strr$   rC   r   r   rO   r   r   r   r   r=   R   s    

r=   c                       s2   e Zd Z fddZdejdejfddZ  ZS )BeeMultiModalProjectorc                    sf   t    tj|jjdd| _tj|jj|jjd dd| _	t
 | _tj|jjd |jjdd| _d S )Ngư>)eps   T)bias)super__init__nn	LayerNormvision_confighidden_sizepre_normLineartext_configlinear_1r   actlinear_2)r   config	__class__r   r   rV   p   s   


zBeeMultiModalProjector.__init__image_featurer   c                 C   s,   |  |}| |}| |}| |}|S r   )r[   r^   r_   r`   )r   rd   hidden_statesr   r   r   forward   s
   



zBeeMultiModalProjector.forward)r8   r9   r:   rV   torchTensorrf   __classcell__r   r   rb   r   rQ   o   s    rQ   )rI   dummy_inputsc                       sF   e Zd ZedddddddZdd	d
ededdf fddZ  ZS )BeeForConditionalGenerationzlanguage_model.model.zvision_tower.zmulti_modal_projector.image_newlinezlanguage_model.lm_head.)zmodel.language_model.zmodel.vision_tower.zmodel.multi_modal_projector.zmodel.image_newlinezlm_head.)orig_to_new_prefixr"   )prefixvllm_configrn   r   Nc                   s&   t  j||d |jj}t|| _d S )N)ro   rn   )rU   rV   model_configr2   rQ   multi_modal_projector)r   ro   rn   ra   rb   r   r   rV      s   z$BeeForConditionalGeneration.__init__)	r8   r9   r:   r   hf_to_vllm_mapperr   rP   rV   ri   r   r   rb   r   rk      s    (rk   )collections.abcr   rg   torch.nnrW   transformers.activationsr   vllm.configr   vllm.config.multimodalr   vllm.multimodalr   vllm.multimodal.inputsr   
llava_nextr	   r
   r   llava_onevisionr   utilsr   r   r=   ModulerQ   register_processorrk   r   r   r   r   <module>   s(   :