o
    i                     @   s   d dl mZ d dlZd dlmZ d dlmZ d dlmZ d dl	m
Z
 d dlmZ d dlmZ dd	lmZmZmZ dd
lmZ ddlmZ G dd deZG dd dee ZG dd dejZejeeedG dd deZdS )    )MappingN)GELUActivation)
VllmConfig)BaseDummyOptions)MULTIMODAL_REGISTRY)MultiModalDataDict   )LlavaDummyInputsBuilderLlavaNextMultiModalProcessorLlavaNextProcessingInfo)&LlavaOnevisionForConditionalGeneration)WeightsMapperc                   @   s"   e Zd Zdd ZdefddZdS )RVLProcessingInfoc                 C   s
   | j  S N)ctxget_hf_config)self r   T/home/ubuntu/vllm_env/lib/python3.10/site-packages/vllm/model_executor/models/rvl.pyr      s   
zRVLProcessingInfo.get_hf_configkwargsc                 K   s   | j jdi |S )Nr   )r   get_hf_processor)r   r   r   r   r   r      s   z"RVLProcessingInfo.get_hf_processorN)__name__
__module____qualname__r   objectr   r   r   r   r   r      s    r   c                	   @   sX   e Zd Zdeeef defddZ	d
dedeeef deeef dB defdd	Z	dS )RVLDummyInputsBuilder	mm_countsreturnc                 C   s   | dd}d}|| S )Nimager   z<image>)get)r   r   
num_imagesimage_tokenr   r   r   get_dummy_text!   s   z$RVLDummyInputsBuilder.get_dummy_textNseq_len
mm_optionsc                 C   sB   | dd}| j \}}|r| dnd }d| j||||diS )Nr   r   )widthheightr    	overrides)r   info!get_image_size_with_most_features_get_dummy_images)r   r#   r   r$   r    target_widthtarget_heightimage_overridesr   r   r   get_dummy_mm_data'   s   z'RVLDummyInputsBuilder.get_dummy_mm_datar   )
r   r   r   r   strintr"   r   r   r.   r   r   r   r   r       s    

r   c                       s2   e Zd Z fddZdejdejfddZ  ZS )RVLMultiModalProjectorc                    s^   t    tj|jjdd| _tj|jj|jjdd| _	t
 | _tj|jj|jjdd| _d S )Ngư>)epsT)bias)super__init__nn	LayerNormvision_confighidden_sizepre_normLineartext_configlinear_1r   actlinear_2)r   config	__class__r   r   r5   >   s   
zRVLMultiModalProjector.__init__image_featurer   c                 C   s,   |  |}| |}| |}| |}|S r   )r:   r=   r>   r?   )r   rC   hidden_statesr   r   r   forwardM   s
   



zRVLMultiModalProjector.forward)r   r   r   r5   torchTensorrE   __classcell__r   r   rA   r   r1   =   s    r1   )r(   dummy_inputsc                       sF   e Zd ZedddddddZdd	d
ededdf fddZ  ZS )RForConditionalGenerationzlanguage_model.model.zvision_tower.zmulti_modal_projector.image_newlinezlanguage_model.lm_head.)zmodel.language_model.zmodel.vision_tower.zmodel.multi_modal_projector.zmodel.image_newlinezlm_head.)orig_to_new_prefix )prefixvllm_configrN   r   Nc                   s&   t  j||d |jj}t|| _d S )N)rO   rN   )r4   r5   model_config	hf_configr1   multi_modal_projector)r   rO   rN   r@   rA   r   r   r5   h   s   z"RForConditionalGeneration.__init__)	r   r   r   r   hf_to_vllm_mapperr   r/   r5   rH   r   r   rA   r   rJ   V   s    (rJ   )collections.abcr   rF   torch.nnr6   transformers.activationsr   vllm.configr   vllm.config.multimodalr   vllm.multimodalr   vllm.multimodal.inputsr   
llava_nextr	   r
   r   llava_onevisionr   utilsr   r   r   Moduler1   register_processorrJ   r   r   r   r   <module>   s(   