o
    
۾i;                     @   s   d dl mZ d dlmZmZ d dlmZ d dlmZ d dl	m
Z
mZ d dlmZ d dlmZ d dlmZ G d	d
 d
eZG dd deZG dd deZdS )    )Any)	BaseModelField)ModelConfig)ChatCompletionLogProbs)SamplingParamsStreamOptions)LogprobTokenizeParamsrandom_uuidc                   @   s   e Zd ZU edd ddZeed< ee ed< 	 dZ	edB ed< 	 e
ed	< 	 dZedB ed
< dZedB ed< dZedB ed< edddZedB ed< edddZeed< edddZeeef dB ed< dedefddZdS )GenerateRequestc                   C      t   S Nr    r   r   Z/home/ubuntu/.local/lib/python3.10/site-packages/vllm/entrypoints/serve/disagg/protocol.py<lambda>       zGenerateRequest.<lambda>The request_id related to this request. If the caller does not set it, a random_uuid will be generated. This id is used through out the inference process and return in response.default_factorydescription
request_id	token_idsNfeaturessampling_paramsmodelFstreamstream_optionsa/  If specified, the prefix cache will be salted with the provided string to prevent an attacker to guess prompts in multi-user environments. The salt should be random, protected from access by 3rd parties, and long enough to be unpredictable (e.g., 43 characters base64-encoded, corresponding to 256 bit).defaultr   
cache_saltr   zThe priority of the request (lower means earlier handling; default: 0). Any priority other than 0 will raise an error if the served model does not use priority scheduling.priority5KVTransfer parameters used for disaggregated serving.kv_transfer_paramsmodel_configreturnc                 C   s   t d ddS )Nr   )max_total_tokensmax_output_tokensr
   )selfr&   r   r   r   build_tok_paramsC   s   z GenerateRequest.build_tok_params)__name__
__module____qualname__r   r   str__annotations__listintr   r   r   r   boolr   r   r"   r#   r%   dictr   r   r   r+   r   r   r   r   r      s6   
 r   c                   @   sJ   e Zd ZU eed< dZedB ed< dZedB ed< dZ	e
e dB ed< dS )GenerateResponseChoiceindexNlogprobsstopfinish_reasonr   )r,   r-   r.   r2   r0   r7   r   r9   r/   r   r1   r   r   r   r   r5   J   s
   
 r5   c                   @   sr   e Zd ZU edd ddZeed< ee ed< dZ	ee
eef dB  dB ed< edd	d
Ze
eef dB ed< dS )GenerateResponsec                   C   r   r   r   r   r   r   r   r   T   r   zGenerateResponse.<lambda>r   r   r   choicesNprompt_logprobsr$   r    r%   )r,   r-   r.   r   r   r/   r0   r1   r5   r<   r4   r2   r	   r%   r   r   r   r   r   r:   R   s   
  r:   N)typingr   pydanticr   r   vllm.configr   0vllm.entrypoints.openai.chat_completion.protocolr   'vllm.entrypoints.openai.engine.protocolr   r   vllm.logprobsr	   vllm.renderersr   
vllm.utilsr   r   r5   r:   r   r   r   r   <module>   s   7