o
    پi
                     @   sP   d dl mZmZmZmZ d dlmZ d dlm	Z	 d dl
mZ G dd deZdS )    )DictListOptionalUnion)BaseMultimodalProcessor)Gemma3nForConditionalGeneration)MultimodalSpecialTokensc                
       sj   e Zd ZdZegZ fddZ				ddeee	e
eef   deee	e
eef   de
fd	d
Z  ZS )Gemma3nSGLangProcessorzCMultimodal processor for Gemma3n supporting image and audio inputs.c                    s^   t  j|||g|R i | |j| _|j| _|j| _|j| _	t
d|jd|jd|| _d S )Nz<image_soft_token>z<audio_soft_token>)image_tokenimage_token_idaudio_tokenaudio_token_id)super__init__boi_token_idIM_START_TOKEN_IDeoi_token_idIM_END_TOKEN_IDboa_token_idAUDIO_START_TOKEN_IDeoa_token_idAUDIO_END_TOKEN_IDr   r   r   build	mm_tokens)self	hf_configserver_args
_processorargskwargs	__class__ \/home/ubuntu/.local/lib/python3.10/site-packages/sglang/srt/multimodal/processors/gemma3n.pyr      s   zGemma3nSGLangProcessor.__init__N 
image_data
audio_data
input_textc                    sD   | j |||| jd}| || j\}}	}
|	 || jj| jjdS )z3Process multimodal data including images and audio.)promptr%   r&   multimodal_tokens)	input_idsmm_itemsim_token_idr   )load_mm_datar   process_and_combine_mm_datatolistr   r   )r   r%   r&   r'   request_objr   r   base_outputr+   r*   _r"   r"   r#   process_mm_data_async,   s   

z,Gemma3nSGLangProcessor.process_mm_data_async)NNr$   N)__name__
__module____qualname____doc__r   modelsr   r   r   r   strbytesr   r3   __classcell__r"   r"   r    r#   r	      s    r	   N)typingr   r   r   r   (sglang.srt.managers.multimodal_processorr   SGLangBaseProcessorsglang.srt.models.gemma3n_mmr   /sglang.srt.multimodal.processors.base_processorr   r	   r"   r"   r"   r#   <module>   s
   