o
    -i                     @   sH  U d dl Z d dlZd dlZd dlZd dlZd dlZd dlmZ d dlZd dl	Z	d dl
Z
d dlZd dlZd dlZd dlmZ d dlmZmZ d dlmZ d dlmZ d dlmZ d dlmZ d dlZd dlZd dlmZm Z m!Z!m"Z" d dl#m$Z$ d d	l%m&Z& d d
l'm(Z( d dl)m*Z* d dl+m,Z,m-Z-m.Z.m/Z/ d dl0m1Z1m2Z2m3Z3m4Z4m5Z5 d dl6m7Z7 d dl8m9Z9 d dl:m;Z; d dl<m=Z= d dl>m?Z? d dl@mAZA d dlBmCZC d dlDmEZEmFZFmGZG d dlHmIZI d dlJmKZKmLZL d dlMmNZN d dlOmPZPmQZQ d dlRmSZS d dlTmUZU d dlVmWZW d dlXmYZY d dlZm[Z[m\Z\ d dl]m^Z^ d dl_m`Z` d d lambZb d d!lcmdZdmeZemfZfmgZgmhZh d d"limjZj d d#lkmlZl d d$lmmnZn d d%lompZp d d&lqmrZr d d'lsmtZt d d(lumvZv d d)lwmxZx d d*lymzZzm{Z{ d d+l|m}Z~ ejed,< eld-Ze Zee j ed.< ed/e fd0d1Zeerjddd2d3ed4erd5edB d6eeef dB d7ee; f
d8d9Zeerjd:dd2d;e9d4erd5ed6eeef dB d7ee; f
d<d=Ze Zd>e"d7eSfd?d@Zd>e"d7ebfdAdBZd>e"d7e;fdCdDZedEd>e"fdFdGZedHdIdJ ZdKedB d7edB fdLdMZG dNdO dOZG dPdQ dQZdRed7efdSdTZG dUdV dVZdWed7dfdXdYZdWed7dfdZd[Zd3ed7e fd\d]ZdDe;d^e/d3ed7dfd_d`Zdaeeef d7ejfdbdcZdded7ejfdedfZdgdh Zdidj ZdrdkdlZ	ds	drdmdnZedokred  etdpdqZeKeZe ZeLe eee dS dS )t    N)	Namespace)AsyncIterator	Awaitable)asynccontextmanager)
HTTPStatus)Any)	APIRouterFastAPIHTTPExceptionRequest)RequestValidationError)CORSMiddleware)JSONResponseiterate_in_threadpool)URLHeadersMutableHeadersState)ASGIAppMessageReceiveScopeSend)AsyncEngineArgs)EngineClient)AnthropicServingMessages)load_chat_template)
serve_http)RequestLogger)DemoToolServerMCPToolServer
ToolServer)OpenAIServingChat)make_arg_parservalidate_parsed_serve_args)OpenAIServingCompletion)	ErrorInfoErrorResponse)OpenAIServing)BaseModelPath)OpenAIServingModels)OpenAIServingResponses)OpenAIServingTranscriptionOpenAIServingTranslation)ServingTokens)ScalingMiddleware)OpenAIServingTokenization)cli_env_setuplog_non_default_argslog_version_and_modelprocess_lora_modulessanitize_message)VLLMValidationError)init_logger)ReasoningParserManager)ToolParserManager)UsageContext)FlexibleArgumentParser)freeze_gc_heap)is_valid_ipv6_address)decorate_logs
set_ulimit)__version__prometheus_multiproc_dirz"vllm.entrypoints.openai.api_server_running_tasksappc                   s   zB| j jr"| j j  fdd}t| }t| |tj nd }t	  zd V  W |d ur4|
  n
|d ur>|
  w w W | ` d S | ` w )Nc                      s&   	 t tjI d H    I d H  qN)asynciosleepenvsVLLM_LOG_STATS_INTERVALdo_log_stats engine_clientrK   _/home/ubuntu/veenaModal/venv/lib/python3.10/site-packages/vllm/entrypoints/openai/api_server.py
_force_log`   s
   zlifespan.<locals>._force_log)state	log_statsrM   rF   create_taskrC   addadd_done_callbackremover=   cancel)rD   rO   taskrK   rL   rN   lifespanZ   s(   

rX   usage_context disable_frontend_multiprocessingclient_configargsrZ   r[   r\   returnc             	   C  s   t ddkr!td td tdg t  td t	
| }|r6|dd|_|dd	|_|d u r?t| j}t||||d
4 I d H }|V  W d   I d H  d S 1 I d H s`w   Y  d S )NVLLM_WORKER_MULTIPROC_METHOD
forkserverz!Setup forkserver with pre-importszvllm.v1.engine.async_llmzForkserver setup complete!client_count   client_indexr   rY   )osgetenvloggerdebugmultiprocessingset_start_methodset_forkserver_preloadr`   ensure_runningr   from_cli_argsget_api_process_count_api_process_rankboolr[   *build_async_engine_client_from_engine_args)r]   rZ   r[   r\   engine_argsenginerK   rK   rN   build_async_engine_clientx   s,   




.rt   Frr   c          	   
   C  s   | j |d}|rtd ddlm} d}|rt|ni }|dd}|dd}z+|j||| j| j	| j
|||d	}|dusAJ | I dH  |V  W |rT|  dS dS |r]|  w w )
z
    Create EngineClient, either:
        - in-process using the AsyncLLMEngine Directly
        - multiprocess using AsyncLLMEngine RPC

    Returns the Client or None if the creation failed.
    )rZ   z:V1 is enabled, but got --disable-frontend-multiprocessing.r   )AsyncLLMNra   rb   rc   )vllm_configrZ   enable_log_requestsaggregate_engine_loggingdisable_log_statsclient_addressesra   rc   )create_engine_configrf   warningvllm.v1.engine.async_llmru   dictpopfrom_vllm_configrw   rx   ry   reset_mm_cacheshutdown)	rr   rZ   r[   r\   rv   ru   	async_llmra   rc   rK   rK   rN   rq      s8   

rq   requestc                 C   s   t | S rE   )tokenizationr   rK   rK   rN   base      r   c                 C   
   | j jjS rE   )rD   rP   openai_serving_tokenizationr   rK   rK   rN   r         
r   c                 C   r   rE   )rD   rP   rM   r   rK   rK   rN   rM      r   rM   z/loadc                    s   t d| jjjidS )Nserver_loadcontent)r   rD   rP   server_load_metricsr   rK   rK   rN   get_server_load_metrics   s   r   z/versionc                     s   dt i} t| dS )Nversionr   )VLLM_VERSIONr   )verrK   rK   rN   show_version   s   
r   log_config_filec              
   C   sx   | sd S zt | }t|W  d    W S 1 sw   Y  W d S  ty; } ztd| | W Y d }~d S d }~ww )Nz0Failed to load log config from file %s: error %s)openjsonload	Exceptionrf   r|   )r   ferK   rK   rN   load_log_config   s   
(r   c                	   @   sZ   e Zd ZdZdedee ddfddZdede	fd	d
Z
dedededed fddZdS )AuthenticationMiddlewareaP  
    Pure ASGI middleware that authenticates each request by checking
    if the Authorization Bearer token exists and equals anyof "{api_key}".

    Notes
    -----
    There are two cases in which authentication is skipped:
        1. The HTTP method is OPTIONS.
        2. The request path doesn't start with /v1 (e.g. /health).
    rD   tokensr^   Nc                 C   s   || _ dd |D | _d S )Nc                 S   s    g | ]}t |d  qS )utf-8)hashlibsha256encodedigest).0trK   rK   rN   
<listcomp>  s     z5AuthenticationMiddleware.__init__.<locals>.<listcomp>)rD   
api_tokens)selfrD   r   rK   rK   rN   __init__  s   z!AuthenticationMiddleware.__init__headersc           	      C   sj   | d}|s	dS |d\}}}| dkrdS t|d }d}| jD ]
}|t	||O }q(|S )NAuthorizationF bearerr   )
rm   	partitionlowerr   r   r   r   r   secretscompare_digest)	r   r   authorization_header_valuescheme_param
param_hashtoken_match
token_hashrK   rK   rN   verify_token  s   

z%AuthenticationMiddleware.verify_tokenscopereceivesendc                 C   s   |d dvs|d dkr|  |||S |dd}t|dj|}t|d}|dr?| |s?td	d
idd}||||S |  |||S )Ntypehttp	websocketmethodOPTIONS	root_path r   z/v1errorUnauthorizedi  )r   status_code)	rD   rm   r   pathremoveprefixr   
startswithr   r   )r   r   r   r   r   url_pathr   responserK   rK   rN   __call__,  s   
z!AuthenticationMiddleware.__call__)__name__
__module____qualname____doc__r   liststrr   r   rp   r   r   r   r   r   r   rK   rK   rK   rN   r     s
    "r   c                	   @   s@   e Zd ZdZdeddfddZdeded	ede	d fd
dZ
dS )XRequestIdMiddlewarez
    Middleware the set's the X-Request-Id header for each response
    to a random uuid4 (hex) value if the header isn't already
    present in the request, otherwise use the provided request id.
    rD   r^   Nc                 C   s
   || _ d S rE   )rD   )r   rD   rK   rK   rN   r   B  r   zXRequestIdMiddleware.__init__r   r   r   c                    sJ   |d dvr|  ||S t|d dtdd f fdd}|  |||S )Nr   r   r   messager^   c                    sL   | d dkrt | d d} dt j}|d| | I dH  dS )zx
            Custom send function to mutate the response headers
            and append X-Request-Id to it.
            r   zhttp.response.startr   )rawzX-Request-IdN)r   rm   uuiduuid4hexappend)r   response_headers
request_idrequest_headersr   rK   rN   send_with_request_idL  s   z;XRequestIdMiddleware.__call__.<locals>.send_with_request_id)rD   r   r   )r   r   r   r   r   rK   r   rN   r   E  s
   
zXRequestIdMiddleware.__call__)r   r   r   r   r   r   r   r   r   r   r   rK   rK   rK   rN   r   ;  s    "r   
chunk_datac                 C   s&  zUddl m} ddlm} | ddkr1|| }|jr+|jd jjr.|jd jjW S W dS W dS | ddkrM|| }|jrP|jd j	rS|jd j	W S W dS W dS W dS  t
jy   d| v r| d r| d d }d|v r~|d d	r~|d d	  Y S |d
r|d
  Y S Y dS Y dS Y dS w )z0Extract content from a streaming response chunk.r   )ChatCompletionStreamResponse)CompletionStreamResponseobjectzchat.completion.chunktext_completionchoicesdeltar   textr   )0vllm.entrypoints.openai.chat_completion.protocolr   +vllm.entrypoints.openai.completion.protocolr   rm   model_validater   r   r   r   pydanticValidationError)r   r   r   chat_responsecompletion_responsechoicerK   rK   rN   _extract_content_from_chunkZ  sF   




r   c                   @   s`   e Zd ZdZdd Zdedee fddZdede	fd	d
Z
de	ddfddZde	fddZdS )
SSEDecoderz:Robust Server-Sent Events decoder for streaming responses.c                 C   s   d| _ g | _d S )Nr   )buffercontent_bufferr   rK   rK   rN   r   {  s   
zSSEDecoder.__init__chunkr^   c                 C   s   ddl }z|d}W n ty   g  Y S w |  j|7  _g }d| jv rn| jdd\}| _|d}|dri|dd  }|d	krM|d
di n|riz|	|}|d|d W n
 |j
yh   Y q w d| jv s%|S )z4Decode a chunk of SSE data and return parsed events.r   Nr   
rb   zdata:    z[DONE]r   donedata)r   r   )r   decodeUnicodeDecodeErrorr   splitrstripr   stripr   loadsJSONDecodeError)r   r   r   	chunk_streventslinedata_str
event_datarK   rK   rN   decode_chunk  s0   




zSSEDecoder.decode_chunkr  c                 C   s   t |S )z Extract content from event data.)r   )r   r  rK   rK   rN   extract_content  r   zSSEDecoder.extract_contentr   Nc                 C   s   |r
| j | dS dS )zAdd content to the buffer.N)r   r   )r   r   rK   rK   rN   add_content  s   zSSEDecoder.add_contentc                 C   s   d | jS )z"Get the complete buffered content.r   )joinr   r   rK   rK   rN   get_complete_content  s   zSSEDecoder.get_complete_content)r   r   r   r   r   bytesr   r~   r  r   r	  r
  r  rK   rK   rK   rN   r   x  s     r   response_bodyc                    sF   ddl m} t d  fdd}|| | _tdt dS )z/Log streaming response with robust SSE parsing.r   r   c                  3   s    D ]V}  d7  | V   | }|D ]E}|d dkr(|d }| q|d dkrX }|rNt|dkrC|d d d }	 td|    d S td    d S qqd S )	Nrb   r   r   r   i   r   z9response_body={streaming_complete: content=%r, chunks=%d}z9response_body={streaming_complete: no_content, chunks=%d})r  r	  r
  r  lenrf   info)r   r  eventr   full_contentchunk_countr  sse_decoderrK   rN   buffered_iterator  s:   

z2_log_streaming_response.<locals>.buffered_iteratorz,response_body={streaming_started: chunks=%d}N)starlette.concurrencyr   r   body_iteratorrf   r  r  )r   r  r   r  rK   r  rN   _log_streaming_response  s   "r  c                 C   s>   z| d   }td| W dS  ty   td Y dS w )zLog non-streaming response.r   zresponse_body={%s}zresponse_body={<binary_data>}N)r   rf   r  r   )r  decoded_bodyrK   rK   rN   _log_non_streaming_response  s   r  c                 C   sB  | j rtd d d td}n| jrtd d td}nttd}| |j_ddlm} || ddlm	} || ddl
m	} || ddlm	} || ddlm	} || ddlm	} || ddlm	} || ddlm}	 |	t |t | j|_ddlm}
 |
| |jt| j| j| j| jd	 |td
tdtfdd}|td
tdtfdd}dd | j pt!j"gD  }r|jt#|d | j$r|t% |t& t!j'rt()d |*ddtfdd}| j*D ]3}|+dd\}}t,t-.||}t/0|r|| qt/1|r|*d| qt2d| dt34|}|S )N)openapi_urldocs_url	redoc_urlrX   )r  r  rX   )rX   r   )register_vllm_serve_api_routers)attach_router)register_sagemaker_routes)register_pooling_api_routers)allow_originsallow_credentialsallow_methodsallow_headersr   excc                    s8   t tt|jt|jj|jdd}t| |jdS )N)r   r   coder   r   )	r(   r'   r6   detailr   r   phraser   
model_dump)r   r'  errrK   rK   rN   http_exception_handler#  s   
z)build_app.<locals>.http_exception_handlerc           
         s   d }|  }|D ]}d|v r%d|d v r%|d d }t|tr%|j} nq	t|}t|}|r>|r>||kr>| d| }n|}ttt|tj	j
tj	|dd}	t|	 tj	dS )Nctxr   r   )r   r   r(  r   r)  r*  )errors
isinstancer7   	parameterr   r(   r'   r6   r   BAD_REQUESTr,  r   r-  )
r   r'  r   r1  r   	ctx_errorexc_str
errors_strr   r.  rK   rK   rN   validation_exception_handler.  s0   
z/build_app.<locals>.validation_exception_handlerc                 S   s   g | ]}|r|qS rK   rK   )r   keyrK   rK   rN   r   L  s    zbuild_app.<locals>.<listcomp>)r   z}CAUTION: Enabling log response in the API Server. This can include sensitive information and should be avoided in production.r   r   c                    sz   || I d H }dd |j 2 I d H }tt||_ |jdd}|dk}|s.td |S |r7t|| |S t| |S )Nc                    s   g | z3 d H W }|q6 S rE   rK   )r   sectionrK   rK   rN   r   _  s    z3build_app.<locals>.log_response.<locals>.<listcomp>zcontent-typer   z text/event-stream; charset=utf-8zresponse_body={<empty>})	r  r   iterr   rm   rf   r  r  r  )r   	call_nextr   r  content_typeis_streamingrK   rK   rN   log_response\  s   

zbuild_app.<locals>.log_response.rb   zInvalid middleware z . Must be a function or a class.)5disable_fastapi_docsr	   rX   enable_offline_docsrP   r]   vllm.entrypoints.server  2vllm.entrypoints.openai.chat_completion.api_routerr   ,vllm.entrypoints.openai.responses.api_router/vllm.entrypoints.openai.translations.api_router-vllm.entrypoints.openai.completion.api_router%vllm.entrypoints.anthropic.api_router)vllm.entrypoints.openai.models.api_router!vllm.entrypoints.sagemaker.routesr!  routerinclude_routerr   vllm.entrypoints.poolingr"  add_middlewarer   allowed_originsr$  allowed_methodsallowed_headersexception_handlerr
   r   r   api_keyrH   VLLM_API_KEYr   enable_request_id_headersr   r0   "VLLM_DEBUG_LOG_API_SERVER_RESPONSErf   r|   
middlewarersplitgetattr	importlibimport_moduleinspectisclassiscoroutinefunction
ValueErrorsagemaker_standards	bootstrap)r]   rD   r  register_chat_api_routerregister_responses_api_router register_translations_api_routerregister_completion_api_routerregister_anthropic_api_routerregister_models_api_routerr!  r"  r/  r8  r   r?  rW  module_pathobject_nameimportedrK   rK   rN   	build_app  s|   







rk  rP   c                    s>  | j } jd ur j}n jg} jrt jd}nd } fdd|D }| |_ j |_||_  |_	| 
 I d H }td| t j} jdkr]t }	t|	tsUJ |	 I d H  n jrmt }	|	 jI d H  nd }	|jd urx|jjni }
|jd ur|jjni }
t j|
}t| ||d|_|j I d H  d|v rt| |j|| j j j j |	 j!j" j# j$ j% j&dnd |_'d|v rt(| |j j)f|| j j* j+ j j j, j  j!j" j# j$ j% j- j&d	nd |_.|j.d ur|j./ I d H  d|v rt0| |j| j j# j$ j&d
nd |_1t2| |j|| j j+ j&d|_3d|v r4t4| |j| j& j$dnd |_5d|v rHt6| |j| j& j$dnd |_7d|v rjt8| |j j)|| j j j j  j!j" j# j$dnd |_9d|v rt:| |j| j j& j# j% j;dnd |_<ddl=m>} || | I d H   j?|_?d|_@d S )N)max_log_lenc                    s   g | ]	}t | jd qS ))name
model_path)r*   model)r   rm  r]   rK   rN   r     s    z"init_app_state.<locals>.<listcomp>zSupported tasks: %sdemo)rM   base_model_pathslora_modulesgenerate)request_loggerchat_templatechat_template_content_formatreturn_tokens_as_token_idsenable_auto_toolstool_parsertool_serverreasoning_parserenable_prompt_tokens_detailsenable_force_include_usageenable_log_outputslog_error_stack)ru  rv  rw  default_chat_template_kwargstrust_request_chat_templaterx  ry  #exclude_tools_when_tool_choice_nonerz  r|  r}  r~  r  enable_log_deltasr  )ru  rx  r}  r~  r  )ru  rv  rw  r  r  transcription)ru  r  r~  )	ru  rv  rw  rx  ry  rz  r|  r}  r~  )ru  rx  r  r}  r  force_no_detokenizer   )init_pooling_state)Arv   served_model_namero  rw   r   rl  rM   ry   rQ   r]   get_supported_tasksrf   r  r   rv  r{  r    r2  init_and_validater!   add_tool_serverlora_configdefault_mm_lorasr5   rs  r+   openai_serving_modelsinit_static_lorasr,   rw  rx  enable_auto_tool_choicetool_call_parserstructured_outputs_configr|  r}  r~  r  r  openai_serving_responsesr#   response_roler  r  r  r  openai_serving_chatwarmupr&   openai_serving_completionr1   r   r-   openai_serving_transcriptionr.   openai_serving_translationr   anthropic_serving_messagesr/   tokens_onlyserving_tokensrM  r  enable_server_load_trackingr   )rM   rP   r]   rv   served_model_namesru  rr  supported_tasksresolved_chat_templater{  r  rs  r  rK   rp  rN   init_app_state  sB  






)







r  addrc                 C   sZ   t j}t| d rt j}t j |t jd}|t jt jd |t jt jd |	|  |S )Nr   familyr   rb   )
socketAF_INETr>   AF_INET6SOCK_STREAM
setsockopt
SOL_SOCKETSO_REUSEADDRSO_REUSEPORTbind)r  r  sockrK   rK   rN   create_server_socket?  s   
r  r   c                 C   s    t j t jt jd}||  |S )Nr  )r  AF_UNIXr  r  )r   r  rK   rK   rN   create_server_unix_socketL  s   
r  c                 C   sv   t  }| jr| j|vrtd| j dd| dt }| jj }r7||vr9td| dd| dd S d S )Nzinvalid tool call parser: z (chose from { ,z })zinvalid reasoning parser: )	r:   list_registeredr  r  KeyErrorr  r9   r  r|  )r]   valid_tool_parsesvalid_reasoning_parsersr|  rK   rK   rN   validate_api_server_argsR  s$   

r  c           	      C   s  t tt| j t|  | jrt| jdkrt| j | j	r+t| j	dkr+t
| j	 t|  | jr8t| j}n| jp<d| jf}t|}t  ddd}ttj| | jr`d| j }||fS |\}}| joi| j}t|rtd| d	n|pwd
}d|r}dnd d| d| }||fS )zRValidate API server args, set up signal handler, create socket
    ready to serve.   r   r^   Nc                  W   s   t d)N
terminated)KeyboardInterrupt)r   rK   rK   rN   signal_handler  r   z$setup_server.<locals>.signal_handlerzunix:[]z0.0.0.0r   sz://:r^   N)r4   rf   r   ro  r3   tool_parser_pluginr  r:   import_tool_parserreasoning_parser_pluginr9   import_reasoning_parserr  udsr  hostportr  r@   signalSIGTERMssl_keyfilessl_certfiler>   )	r]   r  	sock_addrr  listen_addressr  r  is_ssl	host_partrK   rK   rN   setup_serverd  s,   
r  c                    s4   t d t| \}}t||| fi |I dH  dS )zRun a single-worker API server.	APIServerN)r?   r  run_server_worker)r]   uvicorn_kwargsr  r  rK   rK   rN   
run_server  s   r  c           	         s@  |j rt|j dkrt|j  |jr!t|jdkr!t|j t|j}|dur.||d< t	||d4 I dH K}t
|}t||j|I dH  td|jjj|  t|f||j|j|j|j|j tj|j|j|j|j|j|j|j d|I dH }W d  I dH  n1 I dH sw   Y  z|I dH  W |!  dS |!  w )zRun a single API server worker.r  N
log_config)r\   z!Starting vLLM API server %d on %s)r  enable_ssl_refreshr  r  	log_level
access_logtimeout_keep_aliver  r  ssl_ca_certsssl_cert_reqsssl_ciphersh11_max_incomplete_event_sizeh11_max_header_count)"r  r  r:   r  r  r9   r  r   r   rt   rk  r  rP   rf   r  rv   parallel_configro   r   r  r  r  uvicorn_log_leveldisable_uvicorn_access_logrH   VLLM_HTTP_TIMEOUT_KEEP_ALIVEr  r  r  r  r  r  r  close)	r  r  r]   r\   r  r  rM   rD   shutdown_taskrK   rK   rN   r    sX   
(#r  __main__z*vLLM OpenAI-Compatible RESTful API server.)descriptionr  rE   )rF   r   rZ  r\  r   rh   multiprocessing.forkserverr`   rd   r   r  r  tempfiler   argparser   collections.abcr   r   
contextlibr   r   r   typingr   +model_hosting_container_standards.sagemaker	sagemakerr`  r   uvloopfastapir   r	   r
   r   fastapi.exceptionsr   fastapi.middleware.corsr   fastapi.responsesr   r  r   starlette.datastructuresr   r   r   r   starlette.typesr   r   r   r   r   	vllm.envsrH   vllm.engine.arg_utilsr   vllm.engine.protocolr   "vllm.entrypoints.anthropic.servingr   vllm.entrypoints.chat_utilsr   vllm.entrypoints.launcherr   vllm.entrypoints.loggerr    vllm.entrypoints.mcp.tool_serverr    r!   r"   /vllm.entrypoints.openai.chat_completion.servingr#    vllm.entrypoints.openai.cli_argsr$   r%   *vllm.entrypoints.openai.completion.servingr&   'vllm.entrypoints.openai.engine.protocolr'   r(   &vllm.entrypoints.openai.engine.servingr)   'vllm.entrypoints.openai.models.protocolr*   &vllm.entrypoints.openai.models.servingr+   )vllm.entrypoints.openai.responses.servingr,   ,vllm.entrypoints.openai.translations.servingr-   r.   %vllm.entrypoints.serve.disagg.servingr/   ,vllm.entrypoints.serve.elastic_ep.middlewarer0   'vllm.entrypoints.serve.tokenize.servingr1   vllm.entrypoints.utilsr2   r3   r4   r5   r6   vllm.exceptionsr7   vllm.loggerr8   vllm.reasoningr9   vllm.tool_parsersr:   vllm.usage.usage_libr;   vllm.utils.argparse_utilsr<   vllm.utils.gc_utilsr=   vllm.utils.network_utilsr>   vllm.utils.system_utilsr?   r@   vllm.versionrA   r   TemporaryDirectory__annotations__rf   setrC   TaskrX   OPENAI_API_SERVERrp   r~   r   rt   rq   rK  r   r   rM   rm   r   r   r   r   r   r   r   r   r  r  rk  r  tupleintr  r  r  r  r  r  r   parser
parse_argsr]   runrK   rK   rK   rN   <module>   s  

#4
05-	 
 A
,

9