o
    پiI
                     @   s   d dl mZ d dlmZ d dlmZ d dlmZ d dlm	Z	 d dl
mZ d dlmZ d dlmZ d d	lmZ d d
lmZmZ d dlmZ dZG dd deZdS )    )AnyN)PretrainedConfig)ProcessorMixin)PreTrainedTokenizerBase)GenerateReqInput)JetVLMForConditionalGeneration)NVILAForConditionalGeneration)!NVILALiteForConditionalGeneration)BaseMultimodalProcessorMultimodalSpecialTokens)
ServerArgs   c                       sj   e Zd ZU eeegZeee	j
  ed< dedededdf fddZd	edeeef dB fd
dZ  ZS )NVILAMultimodalProcessormodels	hf_configserver_args
_processorreturnNc                    sR   t  j|||g|R i | |  t| jd}t|j|j|j|jd	|| _
d S )N	tokenizer)image_tokenimage_token_idvideo_tokenvideo_token_id)super__init__getattrr   r   r   r   r   r   build	mm_tokens)selfr   r   r   argskwargsr   	__class__ Z/home/ubuntu/.local/lib/python3.10/site-packages/sglang/srt/multimodal/processors/nvila.pyr      s   z!NVILAMultimodalProcessor.__init__request_objc                    sv   | j || j|j|jd}t|jD ]\}}dd |D |j|< q| j|| jdtd\}	}
}|
 |	| jj	| jj
dS )N)promptmultimodal_tokens
image_data
video_datac                 S   s   g | ]}|  qS r#   )asnumpy).0xr#   r#   r$   
<listcomp>A   s    zBNVILAMultimodalProcessor.process_mm_data_async.<locals>.<listcomp>T)do_sample_frames
num_frames)	input_idsmm_itemsim_token_idr   )load_mm_datar   r(   r)   	enumeratevideosprocess_and_combine_mm_dataNUM_VIDEO_FRAMEStolistr   r   )r   r(   
audio_data
input_textr%   r    base_outputivideor1   r0   _r#   r#   r$   process_mm_data_async1   s(   z.NVILAMultimodalProcessor.process_mm_data_async)__name__
__module____qualname__r   r	   r   r   listtypennModule__annotations__r   r   r   r   r   dictstrr   r?   __classcell__r#   r#   r!   r$   r      s&   
 r   )typingr   torch.nnrE    transformers.configuration_utilsr   transformers.processing_utilsr   $transformers.tokenization_utils_baser   sglang.srt.managers.io_structr   sglang.srt.models.jet_vlmr   sglang.srt.models.nvilar   sglang.srt.models.nvila_liter	   /sglang.srt.multimodal.processors.base_processorr
   r   sglang.srt.server_argsr   r7   r   r#   r#   r#   r$   <module>   s    