o
    ٷi                     @   sl   d dl mZ d dlmZ d dlZd dlZd dlmZ er"d dl	m
Z
 d dlmZmZmZ G dd deZdS )	    )Callable)TYPE_CHECKINGN)Request)	BlockHash)AdditionalInformationPayloadOmniEngineCoreRequestPromptEmbedsPayloadc                       s   e Zd ZdZ			ddeejB dB dedB dedB f fddZ	e
deejB dB dejdB fd	d
Zedededged f dB ddfddZ  ZS )OmniRequesta*  Request class for omni models, extending the base Request.

    This class extends the base vLLM Request with support for prompt
    embeddings and additional information payloads, enabling direct
    transfer of pre-computed embeddings between stages.

    Args:
        prompt_embeds: Optional serialized prompt embeddings payload.
            Used for direct transfer of embeddings between stages.
        additional_information: Optional additional information payload
            containing tensors or lists to be passed along with the request.
    Nprompt_embedsexternal_req_idadditional_informationc                    sD   |  |}t j|d|i| t|tr|nd | _|| _|| _d S )Nr
   )_maybe_decode_prompt_embedssuper__init__
isinstancer   prompt_embeds_payloadr   r   )selfr
   r   r   argskwargsprompt_embeds_tensor	__class__ E/home/ubuntu/.local/lib/python3.10/site-packages/vllm_omni/request.pyr      s   
	
zOmniRequest.__init__returnc                 C   s@   t | trtt| j}tj| j|d}|| j}t	
|S | S )N)dtype)r   r   getattrnpr   
frombufferdatareshapeshapetorch
from_numpy)r
   r   arrr   r   r   r   0   s   

z'OmniRequest._maybe_decode_prompt_embedsrequestblock_hasherr   r   c                 C   s   | di d|j d|jd|jd|jd|jd|jd|jd|jd	|jd
|j	d|j
d|jd|jd|jd|d|jd|jd|jS )aE  Create an OmniRequest from an OmniEngineCoreRequest.

        Args:
            request: The OmniEngineCoreRequest to convert
            block_hasher: Optional function to compute block hashes for
                prefix caching

        Returns:
            OmniRequest instance created from the engine core request
        
request_idr   client_indexprompt_token_idsr
   mm_featuressampling_paramspooling_paramseos_token_idarrival_timelora_request
cache_saltprioritytrace_headersr&   r   	resumablereasoning_endedNr   )r'   r   r(   r)   r
   r*   r+   r,   r-   r.   r/   r0   r1   r2   r   r3   r4   )clsr%   r&   r   r   r   from_engine_core_request;   sJ   	
z$OmniRequest.from_engine_core_request)NNN)__name__
__module____qualname____doc__r   r"   Tensorstrr   r   staticmethodr   classmethodr   r   listr6   __classcell__r   r   r   r   r	      s4    
r	   )collections.abcr   typingr   numpyr   r"   vllm.v1.requestr   vllm.v1.core.kv_cache_utilsr   vllm_omni.enginer   r   r   r	   r   r   r   r   <module>   s    