o
    پi                     @   sn   d dl Z d dlmZmZ d dlmZ d dlmZ d dlm	Z	m
Z
 e eZG dd deZG dd	 d	e	ZdS )
    N)ListUnion)ProcessorMixin)Phi4MMForCausalLM)BaseMultimodalProcessorMultimodalSpecialTokensc                   @   s   e Zd ZdddZdd ZdS )Phi4MMProcessorAdapterreturnNc                 C   s
   || _ d S )N)
_processor)selfr
    r   [/home/ubuntu/.local/lib/python3.10/site-packages/sglang/srt/multimodal/processors/phi4mm.py__init__   s   
zPhi4MMProcessorAdapter.__init__c                 K   s^   | j di |}dddd}| D ]\}}||v r#|| ||< ||= qdd | D }|S )Npixel_valuesaudio_featuresaudio_feature_lens)input_image_embedsinput_audio_embedsaudio_embed_sizesc                 S   s4   i | ]\}}|d urt |dr| dkr||qS )Nnumelr   )hasattrr   ).0kvr   r   r   
<dictcomp>&   s
    z3Phi4MMProcessorAdapter.__call__.<locals>.<dictcomp>r   )r
   items)r   kwargsresultkey_mappinghf_key
sglang_keyfiltered_resultr   r   r   __call__   s   zPhi4MMProcessorAdapter.__call__)r	   N)__name__
__module____qualname__r   r"   r   r   r   r   r      s    
r   c                       s<   e Zd ZegZ fddZdeeee	f  fddZ
  ZS )Phi4MMMultimodalProcessorc                    sn   t || _t j||| jg|R i | d| _d| _d| _d| _d| _t	| j| j| j| jd
| j| _d S )Nz<|endoftext10|>z<|endoftext11|>iJ iK i>  )image_tokenimage_token_idaudio_tokenaudio_token_id)r   	processorsuperr   IMAGE_TOKENAUDIO_TOKENIM_TOKEN_IDAUDIO_TOKEN_IDAUDIO_SAMPLE_RATEr   build	mm_tokens)r   	hf_configserver_argsr
   argsr   	__class__r   r   r   1   s   
 z"Phi4MMMultimodalProcessor.__init__
image_datac           
         sh    j ||| j jd}|jd ur fdd|jD |_ | j\}}}	| | jj jjdS )N)prompt
audio_datar9   multimodal_tokensaudio_sample_ratec                    s   g | ]}| j fqS r   )r1   )r   audior   r   r   
<listcomp>W   s    
zCPhi4MMMultimodalProcessor.process_mm_data_async.<locals>.<listcomp>)	input_idsmm_itemsim_token_idr*   )load_mm_datar3   r1   audiosprocess_and_combine_mm_datatolistr(   r*   )
r   r9   r;   
input_textrequest_objr   base_outputrB   rA   _r   r?   r   process_mm_data_asyncD   s(   


z/Phi4MMMultimodalProcessor.process_mm_data_async)r#   r$   r%   r   modelsr   r   r   strbytesrL   __classcell__r   r   r7   r   r&   .   s    r&   )loggingtypingr   r   transformers.processing_utilsr   sglang.srt.models.phi4mmr   /sglang.srt.multimodal.processors.base_processorr   r   	getLoggerr#   loggerr   r&   r   r   r   r   <module>   s    
