o
    
۾iH+                  
   @   s  d dl Z d dlZd dlZd dlZd dlmZ d dlmZ d dlm	Z	 d dl
mZ d dlZd dlmZ d dlmZmZ d dlmZmZ d d	lmZ d d
lmZ d dlmZmZ d dlmZ d dlm Z  erqd dl!m"Z" d dl#m$Z$ ne%Z"e%Z$ee&Z'dZ(deddfddZ)dd Z*defddZ+dd Z,dd Z-de.de.dB de.d e/de.f
d!d"Z0d#eeB fd$d%Z1d&d'd(e2de3e2e2f fd)d*Z4d+e5e$ d,e/e6e6f dB de5e$ fd-d.Z7d/e6de6fd0d1Z8d2ed3e6d4e6ddfd5d6Z9dS )7    N)	Namespace)Logger)Template)TYPE_CHECKING)Request)JSONResponseStreamingResponse)BackgroundTaskBackgroundTasks)envs)
EngineArgs)current_formatter_typeinit_logger)current_platform)FlexibleArgumentParser)StreamOptionsLoRAModulePatha  For full list:            vllm {subcmd} --help=all
For a section:            vllm {subcmd} --help=ModelConfig    (case-insensitive)
For a flag:               vllm {subcmd} --help=max-model-len  (_ or - accepted)
Documentation:            https://docs.vllm.ai
requestreturnc                    sT   	 |   I dH }|d dkr)t| jjddr't| jjdr'| jj jd8  _dS q)	z+Returns if a disconnect message is receivedTNtypezhttp.disconnectenable_server_load_trackingFserver_load_metrics   )receivegetattrappstatehasattrr   )r   message r    J/home/ubuntu/.local/lib/python3.10/site-packages/vllm/entrypoints/utils.pylisten_for_disconnect*   s   
r"   c                       t   fdd}|S )a  Decorator that allows a route handler to be cancelled by client
    disconnections.

    This does _not_ use request.is_disconnected, which does not work with
    middleware. Instead this follows the pattern from
    starlette.StreamingResponse, which simultaneously awaits on two tasks- one
    to wait for an http disconnect message, and the other to do the work that we
    want done. When the first task finishes, the other is cancelled.

    A core assumption of this method is that the body of the request has already
    been read. This is a safe assumption to make for fastapi handlers that have
    already parsed the body of the request into a pydantic model for us.
    This decorator is unsafe to use elsewhere, as it will consume and throw away
    all incoming messages for the request while it looks for a disconnect
    message.

    In the case where a `StreamingResponse` is returned by the handler, this
    wrapper will stop listening for disconnects and instead the response object
    will start listening for disconnects.
    c                     s   t | dkr| d n|d }t | i |}tt|}tj||gtjdI d H \}}|D ]}|  q1||v r@| S d S )Nr   raw_request)return_when)lenasynciocreate_taskr"   waitFIRST_COMPLETEDcancelresult)argskwargsr   handler_taskcancellation_taskdonependingtaskhandler_funcr    r!   wrapperQ   s   

z"with_cancellation.<locals>.wrapper	functoolswraps)r5   r6   r    r4   r!   with_cancellation9   s   r:   c                 C   s   | j j jd8  _d S )Nr   )r   r   r   )r   r    r    r!   decrement_server_loadf   s   r;   c                    r#   )Nc                     sd  | dt| dkr| d nd }|d u rtdt|jjdds+ | i |I d H S t|jjds7d|jj_|jj jd7  _z | i |I d H }W n ty]   |jj jd8  _ w t	|t
tfr|jd u rrtt||_|S t	|jtr|jt| |S t	|jtrt }|j|jjg|jjR i |jj |t| ||_|S |jj jd8  _|S )Nr$   r   z9raw_request required when server load tracking is enabledr   Fr   r   )getr&   
ValueErrorr   r   r   r   r   	Exception
isinstancer   r   
backgroundr	   r;   r
   add_taskfuncr-   r.   )r-   r.   r$   responsetasksrB   r    r!   r6   k   sL    

z load_aware_call.<locals>.wrapperr7   )rB   r6   r    rE   r!   load_aware_callj   s   +rF   c                   C   s&   dt jvrtd dt jd< d S d S )NVLLM_WORKER_MULTIPROC_METHODz/Setting VLLM_WORKER_MULTIPROC_METHOD to 'spawn'spawn)osenvironloggerdebugr    r    r    r!   cli_env_setup   s   

rM   max_model_len
max_tokensinput_lengthdefault_sampling_paramsc                 C   s2   | | }t |}tdd ||||dfD S )Nc                 s   s    | ]	}|d ur|V  qd S )Nr    ).0valr    r    r!   	<genexpr>   s    z!get_max_tokens.<locals>.<genexpr>rO   )r   get_max_output_tokensminr<   )rN   rO   rP   rQ   default_max_tokensmax_output_tokensr    r    r!   get_max_tokens   s   
rY   r-   c           
      C   s   ddl m} i }t| tr/|t }t|g  D ]\}}|t| |kr-t| |||< qn8t| t	rct	| j
d}t| D ]}t| |j}t||j}	||	krV|||j< q?|j
t	j
krb|j
|d< ntdtd| d S )Nr   )make_arg_parser)modelr[   zDUnsupported argument type. Must be Namespace or EngineArgs instance.znon-default args: %s) vllm.entrypoints.openai.cli_argsrZ   r?   r   r   vars
parse_argsitemsr   r   r[   dataclassesfieldsname	TypeErrorrK   info)
r-   rZ   non_default_argsparserargdefaultdefault_argsfieldcurrent_valdefault_valr    r    r!   log_non_default_args   s0   




rm   stream_optionszStreamOptions | Noneenable_force_include_usagec                 C   s6   | r| j p|}|ot| j}||fS |d}}||fS )NF)include_usageboolcontinuous_usage_stats)rn   ro   rp   include_continuous_usager    r    r!   should_include_usage   s   

rt   args_lora_modulesdefault_mm_lorasc                    sF   ddl m  | }|r! fdd| D }| d u r|}|S ||7 }|S )Nr   r   c                    s   g | ]
\}} ||d qS ))rb   pathr    )rR   modality	lora_pathr   r    r!   
<listcomp>   s    z(process_lora_modules.<locals>.<listcomp>)&vllm.entrypoints.openai.models.servingr   r_   )ru   rv   lora_modulesdefault_mm_lora_pathsr    r   r!   process_lora_modules   s   
r~   r   c                 C   s   t dd| S )Nz at 0x[0-9a-f]+>>)resub)r   r    r    r!   sanitize_message  s   r   lgrversion
model_namec                 C   sb   t jst|  }d u rd}ntd}ddddd}|dkr#t|d	}||}| ||| d S )
Nz(vLLM server version %s, serving model %su+  
       ${w}█     █     █▄   ▄█${r}
 ${o}▄▄${r} ${b}▄█${r} ${w}█     █     █ ▀▄▀ █${r}  version ${w}%s${r}
  ${o}█${r}${b}▄█▀${r} ${w}█     █     █     █${r}  model   ${w}%s${r}
   ${b}▀▀${r}  ${w}▀▀▀▀▀ ▀▀▀▀▀ ▀     ▀${r}
z[97;1mz[93mz[94mz[0m)wobrcolor )r   VLLM_DISABLE_LOG_LOGOr   r   dictfromkeys
substituterd   )r   r   r   	formatterr   logo_templatecolorsr    r    r!   log_version_and_model	  s   
r   ):r'   r`   r8   rI   argparser   loggingr   stringr   typingr   regexr   fastapir   fastapi.responsesr   r   starlette.backgroundr	   r
   vllmr   vllm.engine.arg_utilsr   vllm.loggerr   r   vllm.platformsr   vllm.utils.argparse_utilsr   'vllm.entrypoints.openai.engine.protocolr   'vllm.entrypoints.openai.models.protocolr   object__name__rK   VLLM_SUBCMD_PARSER_EPILOGr"   r:   r;   rF   rM   intr   rY   rm   rq   tuplert   liststrr~   r   r   r    r    r    r!   <module>   st   -0



