o
    پi{                     @   s   d dl Z d dlmZ d dlmZmZ d dlmZ ddlm	Z	 ddl
mZmZmZmZ de jdB d	eeeef  d
e jdB deeeef  dee f
ddZG dd dZdS )    N)PretrainedConfig)ModalityMultimodalDataItem)logger   )tokens_per_frame)EVS	EVSConfigEVSDataItemVideoEVSDataItemimageimage_offsetsvideovideo_offsetsinput_ids_listc                 C   sL   g }| d urt tj| |d}|| |d ur$t tj||d}|| |S )N)modalityfeatureoffsets)r   r   IMAGEappendVIDEO)r   r   r   r   r   itemsitem r   [/home/ubuntu/.local/lib/python3.10/site-packages/sglang/srt/multimodal/evs/evs_processor.py_non_evs_data_items   s   

r   c                   @   sP   e Zd ZdZdedeee ee f fddZde	e
 de
de
d	e
fd
dZdS )EVSProcessorz
    This processor handles prompt construction with the correct number of
    placeholder tokens per frame. When EVS is active, it allocates fewer
    placeholders based on the pruning rate. When inactive, it uses the full
    token count.
    	hf_configconfig_to_evs_modelc              
   C   s   t |dksJ tdd | D sJ d | _|jj}||j}|d u r3td| d|  d S |	|}td| d|j
dkrEd	nd
 d|j d|  |j
dkr[|| _d S d S )Nr   c                 s   s    | ]}t |tV  qd S )N)
issubclassr   ).0modelr   r   r   	<genexpr>>   s    z(EVSProcessor.__init__.<locals>.<genexpr>z[EVS] no model matches z in z[EVS]  g        enableddisabledz for model=z; model_config=)lenallvalues
evs_config	__class____name__getr   infocreate_evs_configvideo_pruning_rate)selfr   r   config_name	evs_modelr)   r   r   r   __init__8   s"   
*

zEVSProcessor.__init__frames_per_video
num_imagesrowscolsc                   s     j du rfddD }t|fS dtt dtjdB dttttf  dtjdB dttttf  d	tt f fd
d}fddD }||fS )z\helper function to create data items for models with static image and video tokens per frameNc                    s   g | ]} g| qS r   r   r    
num_frames)frame_num_tokensr   r   
<listcomp>X   s    z7EVSProcessor.static_size_data_items.<locals>.<listcomp>r   r   r   r   r   returnc           	         sv   g }|d urd fg }t tj|||d}|| |d ur9 fddD }ttj|||| d}|| |S )Nr   )r   r   r   	thw_gridsc                    s   g | ]}| fqS r   r   r8   )r7   r6   r   r   r;   n   s    
zVEVSProcessor.static_size_data_items.<locals>.create_evs_data_items.<locals>.<listcomp>)r   r   r   r=   pre_chunked_input_ids)r
   r   r   r   r   r   )	r   r   r   r   r   r   image_thw_gridsr   video_thw_grids)r7   r4   r5   r6   r   r   create_evs_data_items[   s.   

zBEVSProcessor.static_size_data_items.<locals>.create_evs_data_itemsc                    s   g | ]}t jj| d qS ))qr9   r:   )r   r)   r/   r8   )r:   r0   r   r   r;   {   s    )r)   r   listinttorchTensortupler   )r0   r4   r5   r6   r7   tpfrA   r   )r7   r:   r4   r5   r6   r0   r   static_size_data_itemsP   s*   
 	z#EVSProcessor.static_size_data_itemsN)r+   
__module____qualname____doc__r   dicttyper   r3   rC   rD   rI   r   r   r   r   r   0   s     
r   )rE   transformersr   "sglang.srt.managers.schedule_batchr   r   sglang.utilsr   evs_corer   
evs_moduler   r	   r
   r   rF   rC   rG   rD   r   r   r   r   r   r   <module>   s$   
