o
    -is0                  
   @   s,  d dl Z d dlZd dlZd dlZd dlmZ d dlmZ d dlm	Z	 d dl
mZmZ d dlZd dlmZ d dlmZmZ d dlmZmZ d d	lmZ d d
lmZ d dlmZmZ d dlmZ d dl m!Z! erd dl"m#Z# d dl$m%Z% d dl&m'Z' d dl(m)Z) ne*Z#e*Z%e*Z'e*Z)ee+Z,dZ-deddfddZ.dd Z/defddZ0dd Z1dd Z2	d=de3d e3dB d!e4e5ef dB de3dB fd"d#Z6de3dd$d%e3d&e4de3f
d'd(Z7d)eeB fd*d+Z8d,d-d.e9de:e9e9f fd/d0Z;d1e<e) d2e4e5e5f dB de<e) fd3d4Z=d5e5de5fd6d7Z>d8ed9e5d:e5ddfd;d<Z?dS )>    N)	Namespace)Logger)Template)TYPE_CHECKINGAny)Request)JSONResponseStreamingResponse)BackgroundTaskBackgroundTasks)envs)
EngineArgs)current_formatter_typeinit_logger)current_platform)FlexibleArgumentParser)ChatCompletionRequest)CompletionRequest)StreamOptionsLoRAModulePatha  For full list:            vllm {subcmd} --help=all
For a section:            vllm {subcmd} --help=ModelConfig    (case-insensitive)
For a flag:               vllm {subcmd} --help=max-model-len  (_ or - accepted)
Documentation:            https://docs.vllm.ai
requestreturnc                    sT   	 |   I dH }|d dkr)t| jjddr't| jjdr'| jj jd8  _dS q)	z+Returns if a disconnect message is receivedTNtypezhttp.disconnectenable_server_load_trackingFserver_load_metrics   )receivegetattrappstatehasattrr   )r   message r#   S/home/ubuntu/veenaModal/venv/lib/python3.10/site-packages/vllm/entrypoints/utils.pylisten_for_disconnect4   s   
r%   c                       t   fdd}|S )a  Decorator that allows a route handler to be cancelled by client
    disconnections.

    This does _not_ use request.is_disconnected, which does not work with
    middleware. Instead this follows the pattern from
    starlette.StreamingResponse, which simultaneously awaits on two tasks- one
    to wait for an http disconnect message, and the other to do the work that we
    want done. When the first task finishes, the other is cancelled.

    A core assumption of this method is that the body of the request has already
    been read. This is a safe assumption to make for fastapi handlers that have
    already parsed the body of the request into a pydantic model for us.
    This decorator is unsafe to use elsewhere, as it will consume and throw away
    all incoming messages for the request while it looks for a disconnect
    message.

    In the case where a `StreamingResponse` is returned by the handler, this
    wrapper will stop listening for disconnects and instead the response object
    will start listening for disconnects.
    c                     s   t | dkr| d n|d }t | i |}tt|}tj||gtjdI d H \}}|D ]}|  q1||v r@| S d S )Nr   raw_request)return_when)lenasynciocreate_taskr%   waitFIRST_COMPLETEDcancelresult)argskwargsr   handler_taskcancellation_taskdonependingtaskhandler_funcr#   r$   wrapper[   s   

z"with_cancellation.<locals>.wrapper	functoolswraps)r8   r9   r#   r7   r$   with_cancellationC   s   r=   c                 C   s   | j j jd8  _d S )Nr   )r   r    r   )r   r#   r#   r$   decrement_server_loadp   s   r>   c                    r&   )Nc                     sd  | dt| dkr| d nd }|d u rtdt|jjdds+ | i |I d H S t|jjds7d|jj_|jj jd7  _z | i |I d H }W n ty]   |jj jd8  _ w t	|t
tfr|jd u rrtt||_|S t	|jtr|jt| |S t	|jtrt }|j|jjg|jjR i |jj |t| ||_|S |jj jd8  _|S )Nr'   r   z9raw_request required when server load tracking is enabledr   Fr   r   )getr)   
ValueErrorr   r   r    r!   r   	Exception
isinstancer   r	   
backgroundr
   r>   r   add_taskfuncr0   r1   )r0   r1   r'   responsetasksrE   r#   r$   r9   u   sL    

z load_aware_call.<locals>.wrapperr:   )rE   r9   r#   rH   r$   load_aware_callt   s   +rI   c                   C   s&   dt jvrtd dt jd< d S d S )NVLLM_WORKER_MULTIPROC_METHODz/Setting VLLM_WORKER_MULTIPROC_METHOD to 'spawn'spawn)osenvironloggerdebugr#   r#   r#   r$   cli_env_setup   s   

rP   max_model_lentruncate_prompt_tokenstokenization_kwargsc                 C   sb   |d ur'|dkr
| }|| krt d| d|  d|d ur%d|d< ||d< |S |d ur/d|d< |S )	Nztruncate_prompt_tokens value (z!) is greater than max_model_len (z,). Please, select a smaller truncation size.T
truncation
max_lengthF)r@   )rQ   rR   rS   r#   r#   r$   _validate_truncation_size   s    rW   z)ChatCompletionRequest | CompletionRequestinput_lengthdefault_sampling_paramsc                 C   sD   t |dd p|j}| | }t|}tdd ||||dfD S )Nmax_completion_tokensc                 s   s    | ]	}|d ur|V  qd S Nr#   ).0valr#   r#   r$   	<genexpr>   s    z!get_max_tokens.<locals>.<genexpr>
max_tokens)r   r_   r   get_max_output_tokensminr?   )rQ   r   rX   rY   r_   default_max_tokensmax_output_tokensr#   r#   r$   get_max_tokens   s   
rd   r0   c           
      C   s   ddl m} i }t| tr/|t }t|g  D ]\}}|t| |kr-t| |||< qn8t| t	rct	| j
d}t| D ]}t| |j}t||j}	||	krV|||j< q?|j
t	j
krb|j
|d< ntdtd| d S )Nr   )make_arg_parser)modelrf   zDUnsupported argument type. Must be Namespace or EngineArgs instance.znon-default args: %s) vllm.entrypoints.openai.cli_argsre   rB   r   r   vars
parse_argsitemsr   r   rf   dataclassesfieldsname	TypeErrorrN   info)
r0   re   non_default_argsparserargdefaultdefault_argsfieldcurrent_valdefault_valr#   r#   r$   log_non_default_args   s0   




rx   stream_optionszStreamOptions | Noneenable_force_include_usagec                 C   s6   | r| j p|}|ot| j}||fS |d}}||fS )NF)include_usageboolcontinuous_usage_stats)ry   rz   r{   include_continuous_usager#   r#   r$   should_include_usage  s   

r   args_lora_modulesdefault_mm_lorasc                    sF   ddl m  | }|r! fdd| D }| d u r|}|S ||7 }|S )Nr   r   c                    s   g | ]
\}} ||d qS ))rm   pathr#   )r\   modality	lora_pathr   r#   r$   
<listcomp>  s    z(process_lora_modules.<locals>.<listcomp>)&vllm.entrypoints.openai.models.servingr   rj   )r   r   lora_modulesdefault_mm_lora_pathsr#   r   r$   process_lora_modules  s   
r   r"   c                 C   s   t dd| S )Nz at 0x[0-9a-f]+>>)resub)r"   r#   r#   r$   sanitize_message*  s   r   lgrversion
model_namec                 C   sb   t jst|  }d u rd}ntd}ddddd}|dkr#t|d	}||}| ||| d S )
Nz(vLLM server version %s, serving model %su+  
       ${w}█     █     █▄   ▄█${r}
 ${o}▄▄${r} ${b}▄█${r} ${w}█     █     █ ▀▄▀ █${r}  version ${w}%s${r}
  ${o}█${r}${b}▄█▀${r} ${w}█     █     █     █${r}  model   ${w}%s${r}
   ${b}▀▀${r}  ${w}▀▀▀▀▀ ▀▀▀▀▀ ▀     ▀${r}
z[97;1mz[93mz[94mz[0m)wobrcolor )r   VLLM_DISABLE_LOG_LOGOr   r   dictfromkeys
substitutero   )r   r   r   	formatterr"   logo_templatecolorsr#   r#   r$   log_version_and_model/  s   
r   r[   )@r*   rk   r;   rL   argparser   loggingr   stringr   typingr   r   regexr   fastapir   fastapi.responsesr   r	   starlette.backgroundr
   r   vllmr   vllm.engine.arg_utilsr   vllm.loggerr   r   vllm.platformsr   vllm.utils.argparse_utilsr   0vllm.entrypoints.openai.chat_completion.protocolr   +vllm.entrypoints.openai.completion.protocolr   'vllm.entrypoints.openai.engine.protocolr   'vllm.entrypoints.openai.models.protocolr   object__name__rN   VLLM_SUBCMD_PARSER_EPILOGr%   r=   r>   rI   rP   intr   strrW   rd   rx   r|   tupler   listr   r   r   r#   r#   r#   r$   <module>   s   -0




