o
    iB                     @   s   d dl mZmZ d dlmZmZ d dlmZmZ d dl	m
Z
 d dlmZmZ d dlmZ d dlmZmZmZ G dd	 d	e
ZG d
d de
ZG dd de
ZG dd de
ZG dd deZG dd de
ZdS )    )	AnnotatedAny)Fieldmodel_validator)ChatCompletionMessageParamChatTemplateContentFormatOption)OpenAIBaseModel)
ChatParamsmerge_kwargs)random_uuid)
EmbedDTypeEncodingFormat
Endiannessc                   @   s   e Zd ZU dZedB ed< dZedB ed< dZee	e
ddf dB ed< e
eddZeed	< e
d
ddZe	ed< e
dddZeeef dB ed< e
dddZedB ed< dS )PoolingBasicRequestMixinNmodeluser)getruncate_prompt_tokenszThe request_id related to this request. If the caller does not set it, a random_uuid will be generated. This id is used through out the inference process and return in response.)default_factorydescription
request_idr   zThe priority of the request (lower means earlier handling; default: 0). Any priority other than 0 will raise an error if the served model does not use priority scheduling.defaultr   priorityz.Additional kwargs to pass to the HF processor.mm_processor_kwargsa/  If specified, the prefix cache will be salted with the provided string to prevent an attacker to guess prompts in multi-user environments. The salt should be random, protected from access by 3rd parties, and long enough to be unpredictable (e.g., 43 characters base64-encoded, corresponding to 256 bit).
cache_salt)__name__
__module____qualname__r   str__annotations__r   r   r   intr   r   r   r   r   dictr   r    r$   r$   \/home/ubuntu/vllm_env/lib/python3.10/site-packages/vllm/entrypoints/pooling/base/protocol.pyr      s(   
 r   c                   @   sF   e Zd ZU ee eee  B eB ee B ed< edddZe	ed< dS )CompletionRequestMixininputTzMIf true (the default), special tokens (e.g. BOS) will be added to the prompt.r   add_special_tokensN)
r   r   r   listr"   r    r!   r   r(   boolr$   r$   r$   r%   r&   =   s   
 $r&   c                   @   s   e Zd ZU ee ed< edddZeed< edddZ	eed< edddZ
eed	< ed
ddZed
B ed< ed
ddZeeef d
B ed< eddedd Zded
B dedefddZd
S )ChatRequestMixinmessagesFzIf true, the generation prompt will be added to the chat template. This is a parameter used by chat template in tokenizer config of the model.r   add_generation_prompta:  If this is set, the chat will be formatted so that the final message in the chat is open-ended, without any EOS tokens. The model will continue this message rather than starting a new one. This allows you to "prefill" part of the model's response for it. Cannot be used at the same time as `add_generation_prompt`.continue_final_messagezIf true, special tokens (e.g. BOS) will be added to the prompt on top of what is added by the chat template. For most models, the chat template takes care of adding the special tokens so this should be set to false (as is the default).r(   NzA Jinja template to use for this conversion. As of transformers v4.44, default chat template is no longer allowed, so you must provide a chat template if the tokenizer does not define one.chat_templatezbAdditional keyword args to pass to the template renderer. Will be accessible by the chat template.chat_template_kwargsbefore)modec                 C   s    | dr| drtd|S )Nr.   r-   zMCannot set both `continue_final_message` and `add_generation_prompt` to True.)get
ValueError)clsdatar$   r$   r%   check_generation_prompt   s
   z(ChatRequestMixin.check_generation_promptdefault_templatedefault_template_content_formatreturnc              	   C   s(   t | jp||t| jt| j| jddS )N)r-   r.   )r/   chat_template_content_formatr0   )r	   r/   r
   r0   r#   r-   r.   )selfr8   r9   r$   r$   r%   build_chat_params   s   z"ChatRequestMixin.build_chat_params)r   r   r   r)   r   r!   r   r-   r*   r.   r(   r/   r    r0   r#   r   r   classmethodr7   r   r	   r=   r$   r$   r$   r%   r+   M   s@   
 

		r+   c                   @   sB   e Zd ZU dZeed< edddZeed< edddZ	e
ed	< d
S )EncodingRequestMixinfloatencoding_formatfloat32zWhat dtype to use for encoding. Default to using float32 for base64 encoding to match the OpenAI python client behavior. This parameter will affect base64 and binary_response.r   embed_dtypenativezWhat endianness to use for encoding. Default to using native for base64 encoding to match the OpenAI python client behavior.This parameter will affect base64 and binary_response.
endiannessN)r   r   r   rA   r   r!   r   rC   r   rE   r   r$   r$   r$   r%   r?      s   
 r?   c                   @   sN   e Zd ZU dZedB ed< edddZedB ed< edddZ	edB ed< dS )EmbedRequestMixinN
dimensionsrWhether to use activation for the pooler outputs. `None` uses the pooler's default, which is `True` in most cases.r   use_activationz0Deprecated; please pass `use_activation` instead	normalize)
r   r   r   rG   r"   r!   r   rI   r*   rJ   r$   r$   r$   r%   rF      s   
 rF   c                   @   s&   e Zd ZU edddZedB ed< dS )ClassifyRequestMixinNrH   r   rI   )r   r   r   r   rI   r*   r!   r$   r$   r$   r%   rK      s
   
 rK   N)typingr   r   pydanticr   r   vllm.entrypoints.chat_utilsr   r   'vllm.entrypoints.openai.engine.protocolr   vllm.renderersr	   r
   
vllm.utilsr   vllm.utils.serial_utilsr   r   r   r   r&   r+   r?   rF   rK   r$   r$   r$   r%   <module>   s   *P