o
    -iE                     @   s0  d dl Z d dlZd dlmZ d dlmZmZmZ d dlZd dl	m
Z
mZ d dlmZmZmZmZmZmZmZmZ d dlmZ d dlmZ d dlmZ d d	lmZmZmZmZ d d
l m!Z! ee"Z#e$ej%Z&G dd deZ'G dd deZ(G dd deZ)G dd deZ*G dd deZ+G dd deZ,dS )    N)replace)	AnnotatedAnyLiteral)Fieldmodel_validator)AnyResponseFormat!LegacyStructuralTagResponseFormatLogitsProcessorsOpenAIBaseModelStreamOptionsStructuralTagResponseFormat	UsageInfoget_logits_processors)VLLMValidationError)init_logger)Logprob)BeamSearchParamsRequestOutputKindSamplingParamsStructuredOutputsParamsrandom_uuidc                	   @   s(  e Zd ZU dZedB ed< dZee eee  B eB ee B dB ed< dZ	e
dB ed< dZedB ed< dZeeef dB ed< dZedB ed	< d
ZedB ed< dZeed< dZedB ed< edejejdZedB ed< g Zeee B dB ed< dZe
dB ed< dZedB ed< dZedB ed< dZedB ed< dZedB ed< dZedB ed< dZ e
ed< dZ!edB ed< dZ"edB ed< dZ#edB ed< dZ$eed< g Z%ee dB ed< dZ&e
ed< dZ'e
ed < d!Z(eed"< d#Z)e
ed$< d#Z*e
ed%< dZ+e,eed&ejdf dB ed'< dZ-ee dB ed(< dZ.edB ed)< dZ/e0ee0 B dB ed*< ed#d+d,Z1e
ed-< edd.d,Z2e3dB ed/< edd0d,Z4e5dB ed1< ed!d2d,Z6eed3< ee7d4d5Z8eed6< edd7d,Z9e:dB ed8< edd9d,Z;e
dB ed:< edd;d,Z<e
dB ed<< edd=d,Z=edB ed>< edd?d,Z>eee?f dB ed@< eddAd,Z@eeeeB eB f dB edB< dddd!ddCZAeedD< 	dXdedEedB dFeBfdGdHZC	dXdedIedB dEedB dFeDfdJdKZEeFdLdMeGdNdO ZHeFdLdMeGdPdQ ZIeFdLdMeGdRdS ZJeFdLdMeGdTdU ZKeFdLdMeGdVdW ZLdS )YCompletionRequestNmodelpromptFechog        frequency_penalty
logit_biaslogprobs   
max_tokens   npresence_penalty)geleseedstopstreamstream_optionssuffixtemperaturetop_puseruse_beam_searchtop_kmin_prepetition_penalty      ?length_penaltystop_token_idsinclude_stop_str_in_output
ignore_eosr   
min_tokensTskip_special_tokensspaces_between_special_tokenstruncate_prompt_tokensallowed_token_idsprompt_logprobsprompt_embedszMIf true (the default), special tokens (e.g. BOS) will be added to the prompt.defaultdescriptionadd_special_tokenszSimilar to chat completion, this parameter specifies the format of output. Only {'type': 'json_object'}, {'type': 'json_schema'}, {'type': 'structural_tag'}, or {'type': 'text' } is supported.response_formatz(Additional kwargs for structured outputsstructured_outputszThe priority of the request (lower means earlier handling; default: 0). Any priority other than 0 will raise an error if the served model does not use priority scheduling.priorityzThe request_id related to this request. If the caller does not set it, a random_uuid will be generated. This id is used through out the inference process and return in response.)default_factoryrB   
request_ida  A list of either qualified names of logits processors, or constructor objects, to apply when sampling. A constructor is a JSON object with a required 'qualname' field specifying the qualified name of the processor class/factory, and optional 'args' and 'kwargs' fields containing positional and keyword arguments. For example: {'qualname': 'my_module.MyLogitsProcessor', 'args': [1, 2], 'kwargs': {'param': 'value'}}.logits_processorszIf specified with 'logprobs', tokens are represented  as strings of the form 'token_id:{token_id}' so that tokens that are not JSON-encodable can be identified.return_tokens_as_token_idsa(  If specified, the result will include token IDs alongside the generated text. In streaming mode, prompt_token_ids is included only in the first chunk, and token_ids contains the delta tokens for each chunk. This is useful for debugging or when you need to map generated text back to input tokens.return_token_idsa/  If specified, the prefix cache will be salted with the provided string to prevent an attacker to guess prompts in multi-user environments. The salt should be random, protected from access by 3rd parties, and long enough to be unpredictable (e.g., 43 characters base64-encoded, corresponding to 256 bit).
cache_saltz5KVTransfer parameters used for disaggregated serving.kv_transfer_paramszWAdditional request parameters with string or numeric values, used by custom extensions.
vllm_xargs)r2   r,   r-   r0   r1   _DEFAULT_SAMPLING_PARAMSdefault_sampling_paramsreturnc                 C   sT   |d u ri }| j d ur| j nd}| j }d u r|dd}t||| j|| j| jdS )Nr"   r,   r3   )
beam_widthr!   r7   r,   r4   r6   )r#   r,   getr   r7   r4   r6   )selfr!   rP   r#   r,    rU   h/home/ubuntu/veenaModal/venv/lib/python3.10/site-packages/vllm/entrypoints/openai/completion/protocol.pyto_beam_search_params   s   z'CompletionRequest.to_beam_search_paramslogits_processor_patternc                 C   s  |d u ri }| j  }d u r|d| jd }| j }d u r&|d| jd }| j }d u r6|d| jd }| j }d u rF|d| jd }| j }d u rV|d| jd }| j}	|	d u rc| jrc| j	}	| joj| j
dk}
| j}|d urtttf  }|jdkrd|d< n5|jd	kr|j}|d usJ |j|d
< n!|jdkr|}|d urt|ttfsJ |jdd}t||d< t|dkr| jd u rtd%i |nt| jfi || _| jr| jni }| jr| j|d< tjd%i d| jd| j d| j!d|d|d|d|d|d| j"d| j#d| j$d| j	d| j%d|
s|ndd| j&d|	d| j'd| j(d| j)dt*| j+|d| j,d| j-rEt.j/nt.j0d | jd!| j1d"| j2d#|p_d d$dS d$dS )&Nr2   r,   r-   r0   r1   r   json_objectTjson_schemajsonstructural_tag)by_aliasrM   r#   r$   r   r'   r(   r5   r   r7   r!   r"   r8   r>   r9   r:   r6   rI   r<   output_kindrE   r   r=   
extra_args
skip_clonerU   )3r2   rS   rO   r,   r-   r0   r1   r>   r   r   r!   rD   dictstrr   typerZ   
isinstancer	   r   
model_dumpr[   dumpslenrE   r   r   rN   rM   r   from_optionalr#   r$   r   r'   r(   r5   r7   r8   r9   r:   r6   r   rI   r<   r)   r   DELTA
FINAL_ONLYr   r=   )rT   r!   rX   rP   r2   r,   r-   r0   r1   r>   echo_without_generationrD   structured_outputs_kwargsrZ   r\   	s_tag_objr_   rU   rU   rV   to_sampling_params   s   











	


z$CompletionRequest.to_sampling_paramsbefore)modec                    sJ   | dd d u r
|S |d  t fdddD }|dkr#tddd|S )NrE   c                 3   s    | ]
}  |d uV  qd S N)rS   ).0krl   rU   rV   	<genexpr>F  s
    
zCCompletionRequest.check_structured_outputs_count.<locals>.<genexpr>)r[   regexchoicer"   z^You can only use one kind of constraints for structured outputs ('json', 'regex' or 'choice').	parameter)rS   sumr   )clsdatacountrU   rt   rV   check_structured_outputs_count?  s   z0CompletionRequest.check_structured_outputs_countc                 C   s   | d }d ur+| dr|dks|dkrtddd|dk r+|dkr+tdd|d| d	 }d ur?|dk r?td
d	|d|S )Nr>   r)   r   r;   z7`prompt_logprobs` are not available when `stream=True`.rx   z1`prompt_logprobs` must be a positive value or -1.)ry   valuer   z$`logprobs` must be a positive value.rS   r   )r{   r|   r>   r   rU   rU   rV   check_logprobsR  s&   z CompletionRequest.check_logprobsc                 C   s$   | dr| dstddd|S )Nr*   r)   z6Stream options can only be defined when `stream=True`.rx   r   r{   r|   rU   rU   rV   validate_stream_optionsk  s   z)CompletionRequest.validate_stream_optionsc                 C   s`   | d}| d}|d u pt|to|dk}|d u p%t|to%t|dk}|r.|r.td|S )Nr   r?    r   z>Either prompt or prompt_embeds must be provided and non-empty.)rS   rd   rb   listrg   
ValueError)r{   r|   r   r?   prompt_is_emptyembeds_is_emptyrU   rU   rV   !validate_prompt_and_prompt_embedsv  s   

z3CompletionRequest.validate_prompt_and_prompt_embedsc                 C   s0   | dd urt|d tr|d std|S )NrL   z>Parameter 'cache_salt' must be a non-empty string if provided.)rS   rd   rb   r   r   rU   rU   rV   check_cache_salt_support  s   z*CompletionRequest.check_cache_salt_supportrq   )M__name__
__module____qualname__r   rb   __annotations__r   r   intr   boolr   floatr   ra   r   r!   r#   r$   r   
_LONG_INFOminmaxr'   r(   r)   r*   r   r+   r,   r-   r.   r/   r0   r1   r2   r4   r5   r6   r7   r8   r9   r:   r<   r   r=   r>   r?   bytesrC   rD   r   rE   r   rF   r   rH   rI   r
   rJ   rK   rL   rM   r   rN   rO   r   rW   r   rn   r   classmethodr~   r   r   r   r   rU   rU   rU   rV   r   ,   s   
 , "

l	r   c                   @   sv   e Zd ZU eedZee ed< eedZee	dB  ed< eedZ
ee ed< eedZeeee	f dB  ed< dS )CompletionLogProbsrG   text_offsetNtoken_logprobstokenstop_logprobs)r   r   r   r   r   r   r   r   r   r   r   rb   r   ra   rU   rU   rU   rV   r     s
   
 &r   c                   @   s   e Zd ZU eed< eed< dZedB ed< dZedB ed< e	dddZ
eeB dB ed< dZee dB ed	< dZeeeef dB  dB ed
< dZee dB ed< dS )CompletionResponseChoiceindextextNr   finish_reasonThe stop string or token id that caused the completion to stop, None if the completion finished for some other reason including encountering the EOS tokenr@   stop_reason	token_idsr>   prompt_token_ids)r   r   r   r   r   rb   r   r   r   r   r   r   r   r>   ra   r   r   rU   rU   rU   rV   r     s   
  r   c                   @   s   e Zd ZU edd dZeed< dZed ed< edd dZ	e
ed< eed	< ee ed
< dZed dB ed< dZedB ed< eed< edddZeeef dB ed< dS )CompletionResponsec                   C      dt   S Nzcmpl-r   rU   rU   rU   rV   <lambda>      zCompletionResponse.<lambda>r   idtext_completionobjectc                   C      t t S rq   r   timerU   rU   rU   rV   r     r   createdr   choicesN)autorA   flexscalerF   service_tiersystem_fingerprintusagezKVTransfer parameters.r@   rM   )r   r   r   r   r   rb   r   r   r   r   r   r   r   r   r   r   rM   ra   r   rU   rU   rU   rV   r     s   
 r   c                   @   s   e Zd ZU eed< eed< dZedB ed< dZedB ed< e	dddZ
eeB dB ed< dZee dB ed	< dZee dB ed
< dS )CompletionResponseStreamChoicer   r   Nr   r   r   r@   r   r   r   )r   r   r   r   r   rb   r   r   r   r   r   r   r   r   rU   rU   rU   rV   r     s   
 
r   c                   @   sp   e Zd ZU edd dZeed< dZeed< edd dZe	ed< eed	< e
e ed
< eddZedB ed< dS )CompletionStreamResponsec                   C   r   r   r   rU   rU   rU   rV   r     r   z!CompletionStreamResponse.<lambda>r   r   r   r   c                   C   r   rq   r   rU   rU   rU   rV   r     r   r   r   r   N)rA   r   )r   r   r   r   r   rb   r   r   r   r   r   r   r   r   rU   rU   rU   rV   r     s   
 r   )-r[   r   dataclassesr   typingr   r   r   torchpydanticr   r   'vllm.entrypoints.openai.engine.protocolr   r	   r
   r   r   r   r   r   vllm.exceptionsr   vllm.loggerr   vllm.logprobsr   vllm.sampling_paramsr   r   r   r   
vllm.utilsr   r   loggeriinfolongr   r   r   r   r   r   r   rU   rU   rU   rV   <module>   s,   (
  j