o
    پi&                     @   s   d dl Z d dlmZmZmZmZ d dlZd dlm	Z
 d dlm  m  mZ d dlmZmZ d dlmZmZmZmZ d dlmZ d dlmZ d dlmZmZ d d	lm Z  d d
l!m"Z"m#Z#m$Z$ d dl%m&Z& G dd de Z'G dd de Z(dS )    N)DictListOptionalUnion)PROCESSOR_MAPPING_NAMES)ModalityMultimodalDataItem)LlavaForConditionalGenerationLlavaLlamaForCausalLMLlavaMistralForCausalLMLlavaQwenForCausalLM)LlavaVidForCausalLM) Mistral3ForConditionalGeneration)expand2squareprocess_anyres_image)BaseMultimodalProcessor)	ImageData
load_imagelogger)get_exception_tracebackc                	       s   e Zd ZeeeegZ fddZe				dde
eeef dee dee fddZde
eeef d	ed
efddZdee defddZdee
eeef  fddZ  ZS )LlavaImageProcessorc                    s"   t  j|||g|R i | d S N)super__init__)self	hf_configserver_args
_processorargskwargs	__class__ Z/home/ubuntu/.local/lib/python3.10/site-packages/sglang/srt/multimodal/processors/llava.pyr   !   s   "zLlavaImageProcessor.__init__N
image_dataimage_aspect_ratioimage_grid_pinpointsc                 C   sF  |j }zt| tr| jn| }t|\}}|d urBt|}||d }	tt|	D ]}
|	|
 t	j
|	|
< q(t	j|	dd}	|	||fW S t|}|dkrct|tdd |jD }||dd d }	n|dkso|d urvd	|v rvt|||}	n||d d }	t|	t	jr|	t	j
}	|	||jfW S  ty   td
t   Y d S w )Npixel_valuesr   axispadc                 s   s    | ]	}t |d  V  qdS )   N)int).0xr"   r"   r#   	<genexpr>?   s    zALlavaImageProcessor._process_single_image_task.<locals>.<genexpr>RGBanyres
anyres_maxzException in TokenizerManager:
)image_processor
isinstancer   urlr   hashrangelenastypenpfloat16stackr   tuple
image_meanconvertr   ndarraysize	Exceptionr   errorr   )r$   r%   r&   	processorr3   r5   image
image_size
image_hashr'   _r"   r"   r#   _process_single_image_task$   sF   z.LlavaImageProcessor._process_single_image_taskaspect_ratiogrid_pinpointsc                    sH   | j d urt }|| j tj|||| jI d H S | |||| jjS r   )cpu_executorasyncioget_running_looprun_in_executorr   rI   r   r3   )r   r$   rJ   rK   loopr"   r"   r#   _process_single_imageU   s"   

	z)LlavaImageProcessor._process_single_imagereturnc                 C   s   g }|D ]E}d|vr<d|v r7|d }t |jdkr#|jd |jd fn	|jd |jd f\}}||fg|d< ndg|d< |t|d tj|d	 qd
|iS )Nimage_sizesr'               )P  rX   feature)rY   modalitymodel_specific_datamm_items)r8   shapeappendr   r   IMAGE)r   r$   r\   itempvhwr"   r"   r#   _process_precomputed_image_datam   s&   
z3LlavaImageProcessor._process_precomputed_image_datac                    s  t |trt|dkrt |d tr| |S |jpdg}t| jdd }t| jdr3d|v r3| jj	nd }t |trt|dkrd|v sHd|v rd}g g g }	}
}g }|D ]}|
| ||| qVtj| I d H }|D ]\}}}|	
| |

| |
| qmt |	d tjrtj|	dd	}	n| |d ||I d H \}	}}|g}ntd
| tj}t |jtr|jd dkrtj}n
|jd dkrtj}dt|	d|i|dgiS )Nr   rE   r%   r&   r1   zmulti-imagesvideor*   r(   zInvalid image data: r\   rS   )rY   r[   rZ   )r4   listr8   dictrd   
modalitiesgetattrr   hasattrr&   r^   rQ   rM   gatherr:   r@   r<   
ValueErrorr   r_   MULTI_IMAGESVIDEOr   )r   r$   
input_textrequest_objr   r   rh   rJ   rK   r'   data_hashesrS   resimg_datapixel_vimage_himage_srG   rF   rZ   r"   r"   r#   process_mm_data_async   sl   




z)LlavaImageProcessor.process_mm_data_async)NNN)__name__
__module____qualname__r
   r   r   r   modelsr   staticmethodr   strbytesr   r   rI   rQ   r   r   rd   rw   __classcell__r"   r"   r    r#   r      s:    0
r   c                   @   s6   e Zd ZdZeegZdefddZdd Z	dd Z
d	S )
LlavaMultimodalProcessorzr
    This is a wrapper class used to identify the multimodal processor for Llava architectures' vision model.
    
model_typec                    sV   |dkrt S t|  r#tj }tt fdd|}|r#|d S td| d)Nclip_vision_modelc                    s
   | j  kS r   )rx   )phf_namer"   r#   <lambda>   s   
 zALlavaMultimodalProcessor._get_sgl_processor_cls.<locals>.<lambda>r   zTCannot find corresponding multimodal processor registered in sglang for model type ``)	r   HF_MAPPING_NAMESgetsgl_mm_processor_utilsPROCESSOR_MAPPINGvaluesrf   filterrl   )r   r   sgl_mm_processor_setsgl_processor_clsr"   r   r#   _get_sgl_processor_cls   s   

z/LlavaMultimodalProcessor._get_sgl_processor_clsc                 O   sx   t |dsJ t |dsJ |j| _|j| _|| _t| jd }r4| ||||g|R i || _d S td| d)Nvision_configtext_configr   z@Required `vision_config.model_type` is not found in hf_config: `r   )rj   r   r   r   ri   r   innerrl   )r   r   r   r   r   r   vision_typer"   r"   r#   r      s    
z!LlavaMultimodalProcessor.__init__c                    s   | j j|i |I d H S r   )r   rw   )r   r   r   r"   r"   r#   rw      s   z.LlavaMultimodalProcessor.process_mm_data_asyncN)rx   ry   rz   __doc__r	   r   r{   r}   r   r   rw   r"   r"   r"   r#   r      s    r   ))rM   typingr   r   r   r   numpyr:   (transformers.models.auto.processing_autor   r   (sglang.srt.managers.multimodal_processorsrtmanagersmultimodal_processorr   "sglang.srt.managers.schedule_batchr   r   sglang.srt.models.llavar	   r
   r   r   sglang.srt.models.llavavidr   sglang.srt.models.mistralr   sglang.srt.multimodal.mm_utilsr   r   /sglang.srt.multimodal.processors.base_processorr   sglang.srt.utilsr   r   r   sglang.utilsr   r   r   r"   r"   r"   r#   <module>   s      <