o
    ۷i                     @   s   d dl mZ d dlmZ d dlmZmZ d dlmZ d dl	m
Z
 d dlmZmZ d dlmZ d dlmZmZmZmZmZ e
eZG d	d
 d
eZdS )    )Any)assert_never)EmbedsInputsSingletonInputs)InputPreprocessor)init_logger)MultiModalInputsMultiModalUUIDDict)SingletonDictPrompt)OmniEmbedsPromptOmniTextPromptOmniTokenInputsOmniTokensPrompttoken_inputs_omnic                       s   e Zd ZdZ	ddddedeeef dB dedB de	e
B fdd	Z	ddddedeeef dB dedB de	e
B fd
dZdedef fddZ	ddddedeeef dB dedB defddZ  ZS )OmniInputPreprocessora  Input preprocessor for omni models.

    Extends the base InputPreprocessor to handle omni-specific input
    types including prompt embeddings and additional information payloads.
    Supports processing tokens, embeddings, text, and multimodal inputs.
    Nmm_uuidsparsed_contenttokenization_kwargsr   returnc                C   s   |d }| dp
i }| d }r7| j|||||d}| d}|dur)||d< | d}	|	dur6|	|d< n!|rD| j|i |||d}n| j||d}
t|
| d| dd	}| d
 }rc||d
< |S )a5  Process text prompts with support for mm_processor_kwargs.

        Extends base class to support mm_processor_kwargs without multi_modal_data.
        This is needed for models like GLM-Image where text-to-image generation
        requires processor kwargs (target_h, target_w) to format the prompt.
        promptmm_processor_kwargsmulti_modal_datar   r   prompt_embedsNadditional_information)r   )r   r   
cache_salt)get_process_multimodal_tokenize_promptr   )selfr   r   r   prompt_textr   r   inputsr   r   prompt_token_idsr    r$   Q/home/ubuntu/vllm_env/lib/python3.10/site-packages/vllm_omni/inputs/preprocess.py_process_text   sJ   

z#OmniInputPreprocessor._process_textc          
      C   s   |  |d |}|d}|d}|d}|r9| j|||dp#i ||d}|d ur0||d< |d ur8||d< nt|||d}|d }	rK|	|d< |S )	Nr#   r   r   r   r   r   )r#   r   r   r   )_truncate_inputsr   r   r   )
r    r   r   r   r#   r   r   r   r"   r   r$   r$   r%   _process_tokensW   s2   


z%OmniInputPreprocessor._process_tokensc                    s*   t  |}|d}|dur||d< |S )zProcess embeddings prompt with omni-specific extensions.

        Extends base _process_embeds to handle additional_information payload
        for direct transfer between pipeline stages.
        r   N)super_process_embedsr   )r    r   r"   r   	__class__r$   r%   r*   }   s
   

z%OmniInputPreprocessor._process_embedsr   c                C   sL   d|v r| j ||dS d|v r| |S d|v r | j|||dS t| dS )z
        Extract the singleton inputs from a prompt.

        Arguments:

        * prompt: single encoder or decoder input prompt

        Returns:

        * [`SingletonInputs`][vllm.inputs.data.SingletonInputs] instance
        r#   r   r   r   r   N)r(   r*   r&   r   )r    r   r   r   r$   r$   r%   _prompt_to_llm_inputs   s   
z+OmniInputPreprocessor._prompt_to_llm_inputs)N)__name__
__module____qualname____doc__r   dictstrr   r	   r   r   r&   r   r(   r   r   r*   r
   r   r-   __classcell__r$   r$   r+   r%   r      sV    

=
&r   N)typingr   typing_extensionsr   vllm.inputs.datar   r   vllm.inputs.preprocessr   vllm.loggerr   vllm.multimodal.inputsr   r	   vllm.renderers.inputsr
   vllm_omni.inputs.datar   r   r   r   r   r.   loggerr   r$   r$   r$   r%   <module>   s    