o
    ijG                     @   sH  d dl Z d dlZd dlmZ d dlmZmZmZ d dlZd dl	m
Z
mZ d dlmZ d dlmZmZmZmZmZmZmZmZ d dlmZ d dlmZ d d	lmZ d d
lmZ d dlm Z m!Z!m"Z"m#Z# d dl$m%Z% ee&Z'e(ej)Z*G dd deZ+G dd deZ,G dd deZ-G dd deZ.G dd deZ/G dd deZ0dS )    N)replace)	AnnotatedAnyLiteral)Fieldmodel_validator)ModelConfig)AnyResponseFormat!LegacyStructuralTagResponseFormatLogitsProcessorsOpenAIBaseModelStreamOptionsStructuralTagResponseFormat	UsageInfoget_logits_processors)VLLMValidationError)init_logger)Logprob)TokenizeParams)BeamSearchParamsRequestOutputKindSamplingParamsStructuredOutputsParamsrandom_uuidc                	   @   s:  e Zd ZU dZedB ed< dZee eee  B eB ee B dB ed< dZ	e
dB ed< dZedB ed< dZeeef dB ed< dZedB ed	< d
ZedB ed< dZeed< dZedB ed< edejejdZedB ed< g Zeee B dB ed< dZe
dB ed< dZedB ed< dZedB ed< dZedB ed< dZedB ed< dZedB ed< dZ e
ed< dZ!edB ed< dZ"edB ed< dZ#edB ed< dZ$eed< g Z%ee dB ed< dZ&e
ed< dZ'e
ed < d!Z(eed"< d#Z)e
ed$< d#Z*e
ed%< dZ+e,eed&ejdf dB ed'< dZ-ee dB ed(< dZ.edB ed)< dZ/e0ee0 B dB ed*< ed#d+d,Z1e
ed-< edd.d,Z2e3dB ed/< edd0d,Z4e5dB ed1< ed!d2d,Z6eed3< ee7d4d5Z8eed6< edd7d,Z9e:dB ed8< edd9d,Z;e
dB ed:< edd;d,Z<e
dB ed<< edd=d,Z=edB ed>< edd?d,Z>eee?f dB ed@< eddAd,Z@eeeeB eB f dB edB< dCeAdDeBfdEdFZCdddd!ddGZDeedH< 	d[dedIedB dDeEfdJdKZF	d[dedLedB dIedB dDeGfdMdNZHeIdOdPeJdQdR ZKeIdOdPeJdSdT ZLeIdOdPeJdUdV ZMeIdOdPeJdWdX ZNeIdOdPeJdYdZ ZOdS )\CompletionRequestNmodelpromptFechog        frequency_penalty
logit_biaslogprobs   
max_tokens   npresence_penalty)geleseedstopstreamstream_optionssuffixtemperaturetop_puseruse_beam_searchtop_kmin_prepetition_penalty      ?length_penaltystop_token_idsinclude_stop_str_in_output
ignore_eosr   
min_tokensTskip_special_tokensspaces_between_special_tokenstruncate_prompt_tokensallowed_token_idsprompt_logprobsprompt_embedszMIf true (the default), special tokens (e.g. BOS) will be added to the prompt.defaultdescriptionadd_special_tokenszSimilar to chat completion, this parameter specifies the format of output. Only {'type': 'json_object'}, {'type': 'json_schema'}, {'type': 'structural_tag'}, or {'type': 'text' } is supported.response_formatz(Additional kwargs for structured outputsstructured_outputszThe priority of the request (lower means earlier handling; default: 0). Any priority other than 0 will raise an error if the served model does not use priority scheduling.priorityzThe request_id related to this request. If the caller does not set it, a random_uuid will be generated. This id is used through out the inference process and return in response.)default_factoryrD   
request_ida  A list of either qualified names of logits processors, or constructor objects, to apply when sampling. A constructor is a JSON object with a required 'qualname' field specifying the qualified name of the processor class/factory, and optional 'args' and 'kwargs' fields containing positional and keyword arguments. For example: {'qualname': 'my_module.MyLogitsProcessor', 'args': [1, 2], 'kwargs': {'param': 'value'}}.logits_processorszIf specified with 'logprobs', tokens are represented  as strings of the form 'token_id:{token_id}' so that tokens that are not JSON-encodable can be identified.return_tokens_as_token_idsa(  If specified, the result will include token IDs alongside the generated text. In streaming mode, prompt_token_ids is included only in the first chunk, and token_ids contains the delta tokens for each chunk. This is useful for debugging or when you need to map generated text back to input tokens.return_token_idsa/  If specified, the prefix cache will be salted with the provided string to prevent an attacker to guess prompts in multi-user environments. The salt should be random, protected from access by 3rd parties, and long enough to be unpredictable (e.g., 43 characters base64-encoded, corresponding to 256 bit).
cache_saltz5KVTransfer parameters used for disaggregated serving.kv_transfer_paramszWAdditional request parameters with string or numeric values, used by custom extensions.
vllm_xargsmodel_configreturnc              	   C   s0   t |j| jpd| j| jt| jo| j dddS )Nr   max_model_lenr#   )max_total_tokensmax_output_tokensr>   rE   needs_detokenizationmax_total_tokens_parammax_output_tokens_param)r   rS   r#   r>   rE   boolr   rM   )selfrQ    r[   a/home/ubuntu/vllm_env/lib/python3.10/site-packages/vllm/entrypoints/openai/completion/protocol.pybuild_tok_params   s   z"CompletionRequest.build_tok_params)r4   r.   r/   r2   r3   _DEFAULT_SAMPLING_PARAMSdefault_sampling_paramsc                 C   sT   |d u ri }| j d ur| j nd}| j }d u r|dd}t||| j|| j| jdS )Nr$   r.   r5   )
beam_widthr#   r9   r.   r6   r8   )r%   r.   getr   r9   r6   r8   )rZ   r#   r_   r%   r.   r[   r[   r\   to_beam_search_params   s   z'CompletionRequest.to_beam_search_paramslogits_processor_patternc                 C   s  |d u ri }| j  }d u r|d| jd }| j }d u r&|d| jd }| j }d u r6|d| jd }| j }d u rF|d| jd }| j }d u rV|d| jd }| j}	|	d u rc| jrc| j	}	| joj| j
dk}
| j}|d urtttf  }|jdkrd|d< n5|jd	kr|j}|d usJ |j|d
< n!|jdkr|}|d urt|ttfsJ |jdd}t||d< t|dkr| jd u rtd%i |nt| jfi || _| jr| jni }| jr| j|d< tjd%i d| jd| j d| j!d|d|d|d|d|d| j"d| j#d| j$d| j	d| j%d|
s|ndd| j&d|	d| j'd| j(d| j)dt*| j+|d| j,d| j-rEt.j/nt.j0d | jd!| j1d"| j2d#|p_d d$dS d$dS )&Nr4   r.   r/   r2   r3   r   json_objectTjson_schemajsonstructural_tag)by_aliasrO   r%   r&   r   r)   r*   r7   r!   r9   r#   r$   r:   r@   r;   r<   r8   rK   r>   output_kindrG   r    r?   
extra_args
skip_cloner[   )3r4   ra   r^   r.   r/   r2   r3   r@   r   r!   r#   rF   dictstrr   typere   
isinstancer
   r   
model_dumprf   dumpslenrG   r   r   rP   rO   r   from_optionalr%   r&   r   r)   r*   r7   r9   r:   r;   r<   r8   r   rK   r>   r+   r   DELTA
FINAL_ONLYr    r?   )rZ   r#   rc   r_   r4   r.   r/   r2   r3   r@   echo_without_generationrF   structured_outputs_kwargsre   rg   	s_tag_objrj   r[   r[   r\   to_sampling_params   s   











	


z$CompletionRequest.to_sampling_paramsbefore)modec                    sJ   | dd d u r
|S |d  t fdddD }|dkr#tddd|S )NrG   c                 3   s    | ]
}  |d uV  qd S N)ra   ).0krw   r[   r\   	<genexpr>P  s
    
zCCompletionRequest.check_structured_outputs_count.<locals>.<genexpr>)rf   regexchoicer$   z^You can only use one kind of constraints for structured outputs ('json', 'regex' or 'choice').	parameter)ra   sumr   )clsdatacountr[   r   r\   check_structured_outputs_countI  s   z0CompletionRequest.check_structured_outputs_countc                 C   s   | d }d ur+| dr|dks|dkrtddd|dk r+|dkr+tdd|d| d	 }d ur?|dk r?td
d	|d|S )Nr@   r+   r   r=   z7`prompt_logprobs` are not available when `stream=True`.r   z1`prompt_logprobs` must be a positive value or -1.)r   valuer!   z$`logprobs` must be a positive value.ra   r   )r   r   r@   r!   r[   r[   r\   check_logprobs\  s&   z CompletionRequest.check_logprobsc                 C   s$   | dr| dstddd|S )Nr,   r+   z6Stream options can only be defined when `stream=True`.r   r   r   r   r[   r[   r\   validate_stream_optionsu  s   z)CompletionRequest.validate_stream_optionsc                 C   s`   | d}| d}|d u pt|to|dk}|d u p%t|to%t|dk}|r.|r.td|S )Nr   rA    r   z>Either prompt or prompt_embeds must be provided and non-empty.)ra   ro   rm   listrr   
ValueError)r   r   r   rA   prompt_is_emptyembeds_is_emptyr[   r[   r\   !validate_prompt_and_prompt_embeds  s   

z3CompletionRequest.validate_prompt_and_prompt_embedsc                 C   s0   | dd urt|d tr|d std|S )NrN   z>Parameter 'cache_salt' must be a non-empty string if provided.)ra   ro   rm   r   r   r[   r[   r\   check_cache_salt_support  s   z*CompletionRequest.check_cache_salt_supportr|   )P__name__
__module____qualname__r   rm   __annotations__r   r   intr   rY   r   floatr    rl   r!   r#   r%   r&   r   
_LONG_INFOminmaxr)   r*   r+   r,   r   r-   r.   r/   r0   r1   r2   r3   r4   r6   r7   r8   r9   r:   r;   r<   r>   r   r?   r@   rA   bytesrE   rF   r	   rG   r   rH   r   rJ   rK   r   rL   rM   rN   rO   r   rP   r   r   r]   r^   r   rb   r   ry   r   classmethodr   r   r   r   r   r[   r[   r[   r\   r   +   s   
 , "


l	r   c                   @   sv   e Zd ZU eedZee ed< eedZee	dB  ed< eedZ
ee ed< eedZeeee	f dB  ed< dS )CompletionLogProbsrI   text_offsetNtoken_logprobstokenstop_logprobs)r   r   r   r   r   r   r   r   r   r   r   rm   r   rl   r[   r[   r[   r\   r     s
   
 &r   c                   @   s   e Zd ZU eed< eed< dZedB ed< dZedB ed< e	dddZ
eeB dB ed< dZee dB ed	< dZeeeef dB  dB ed
< dZee dB ed< dS )CompletionResponseChoiceindextextNr!   finish_reasonThe stop string or token id that caused the completion to stop, None if the completion finished for some other reason including encountering the EOS tokenrB   stop_reason	token_idsr@   prompt_token_ids)r   r   r   r   r   rm   r!   r   r   r   r   r   r   r@   rl   r   r   r[   r[   r[   r\   r     s   
  r   c                   @   s   e Zd ZU edd dZeed< dZed ed< edd dZ	e
ed< eed	< ee ed
< dZed dB ed< dZedB ed< eed< edddZeeef dB ed< dS )CompletionResponsec                   C      dt   S Nzcmpl-r   r[   r[   r[   r\   <lambda>      zCompletionResponse.<lambda>r   idtext_completionobjectc                   C      t t S r|   r   timer[   r[   r[   r\   r     r   createdr   choicesN)autorC   flexscalerH   service_tiersystem_fingerprintusagezKVTransfer parameters.rB   rO   )r   r   r   r   r   rm   r   r   r   r   r   r   r   r   r   r   rO   rl   r   r[   r[   r[   r\   r     s   
 r   c                   @   s   e Zd ZU eed< eed< dZedB ed< dZedB ed< e	dddZ
eeB dB ed< dZee dB ed	< dZee dB ed
< dS )CompletionResponseStreamChoicer   r   Nr!   r   r   rB   r   r   r   )r   r   r   r   r   rm   r!   r   r   r   r   r   r   r   r[   r[   r[   r\   r     s   
 
r   c                   @   sp   e Zd ZU edd dZeed< dZeed< edd dZe	ed< eed	< e
e ed
< eddZedB ed< dS )CompletionStreamResponsec                   C   r   r   r   r[   r[   r[   r\   r     r   z!CompletionStreamResponse.<lambda>r   r   r   r   c                   C   r   r|   r   r[   r[   r[   r\   r     r   r   r   r   N)rC   r   )r   r   r   r   r   rm   r   r   r   r   r   r   r   r   r[   r[   r[   r\   r     s   
 r   )1rf   r   dataclassesr   typingr   r   r   torchpydanticr   r   vllm.configr   'vllm.entrypoints.openai.engine.protocolr	   r
   r   r   r   r   r   r   vllm.exceptionsr   vllm.loggerr   vllm.logprobsr   vllm.renderersr   vllm.sampling_paramsr   r   r   r   
vllm.utilsr   r   loggeriinfolongr   r   r   r   r   r   r   r[   r[   r[   r\   <module>   s0   (
  u