o
    
۾iI                     @   s  U d dl Z d dlZd dlZd dlmZ d dlZd dlZd dlZd dlZd dl	Z	d dl
mZ d dlmZ d dlmZ d dlmZ d dlZd dlmZmZ d dlmZ d dlmZ d d	lmZ d dlmZ d d
lmZ d dl m!Z! d dl"m#Z# d dl$m%Z% d dl&m'Z' d dl(m)Z)m*Z* d dl+m,Z, d dl-m.Z. d dl/m0Z0m1Z1m2Z2m3Z3m4Z4 d dl5m6Z6 d dl7m8Z8 d dl9m:Z: d dl;m<Z<m=Z=m>Z>m?Z? d dl@mAZA d dlBmCZC d dlDmEZEmFZF d dlGmHZH d dlImJZJ d dlKmLZL d dlMmNZN d dlOmPZP d dlQmRZRmSZS d d lTmUZV ejWeXd!< eAd"ZYd#ZZe[eFd$f eXd%< eeLj\ddd&d'ed(eLd)e]dB d*e^e_ef dB d+ee! f
d,d-Z`eeLj\d.dd&d/ed(eLd)e]d*e^e_ef dB d+ee! f
d0d1Za	dMd'ed2e[d3 dB d+efd4d5Zb	dMd6e!d7ed'ed2e[d3 dB d+df
d8d9Zcd:e[e_edf d+ejfd;d<Zed=e_d+ejfd>d?Zfd@dA ZgeJdBdCdDdE ZhdNdFdGZi	dM	dNdHdIZjekdJkre<  eNdKdLZle)elZlelm Zne*en eoeien dS dS )O    N)	Namespace)AsyncIterator)asynccontextmanager)Any)FastAPIHTTPException)RequestValidationError)CORSMiddleware)State)AsyncEngineArgs)EngineClient)load_chat_template)
serve_http)RequestLogger)make_arg_parservalidate_parsed_serve_args)BaseModelPath)OpenAIServingModels)get_uvicorn_log_confighttp_exception_handlerlifespanlog_responsevalidation_exception_handler)sagemaker_standards_bootstrap)ScalingMiddleware)OpenAIServingTokenization)cli_env_setuplog_non_default_argslog_version_and_modelprocess_lora_modules)init_logger)ReasoningParserManager)POOLING_TASKSSupportedTask)ToolParserManager)
instrument)UsageContext)FlexibleArgumentParser)is_valid_ipv6_address)decorate_logs
set_ulimit)__version__prometheus_multiproc_dirz"vllm.entrypoints.openai.api_server)generate._FALLBACK_SUPPORTED_TASKSusage_context disable_frontend_multiprocessingclient_configargsr0   r1   r2   returnc             	   C  s   t ddkr!td td tdg t  td t	
| }|r6|dd|_|dd	|_|d u r?t| j}t||||d
4 I d H }|V  W d   I d H  d S 1 I d H s`w   Y  d S )NVLLM_WORKER_MULTIPROC_METHOD
forkserverz!Setup forkserver with pre-importszvllm.v1.engine.async_llmzForkserver setup complete!client_count   client_indexr   r/   )osgetenvloggerdebugmultiprocessingset_start_methodset_forkserver_preloadr6   ensure_runningr   from_cli_argsget_api_process_count_api_process_rankboolr1   *build_async_engine_client_from_engine_args)r3   r0   r1   r2   engine_argsengine rJ   V/home/ubuntu/.local/lib/python3.10/site-packages/vllm/entrypoints/openai/api_server.pybuild_async_engine_clientE   s,   




.rL   FrH   c          	   
   C  s   | j |d}|rtd ddlm} d}|rt|ni }|dd}|dd}z+|j||| j| j	| j
|||d	}|dusAJ | I dH  |V  W |rT|  dS dS |r]|  w w )
z
    Create EngineClient, either:
        - in-process using the AsyncLLMEngine Directly
        - multiprocess using AsyncLLMEngine RPC

    Returns the Client or None if the creation failed.
    )r0   z:V1 is enabled, but got --disable-frontend-multiprocessing.r   )AsyncLLMNr7   r8   r9   )vllm_configr0   enable_log_requestsaggregate_engine_loggingdisable_log_statsclient_addressesr7   r9   )create_engine_configr<   warningvllm.v1.engine.async_llmrM   dictpopfrom_vllm_configrO   rP   rQ   reset_mm_cacheshutdown)	rH   r0   r1   r2   rN   rM   	async_llmr7   r9   rJ   rJ   rK   rG   i   s8   

rG   supported_tasks)r#   .c                 C   s^  |d u rt jdtdd t}| jrtd d d td}n| jr%td d td}nttd}| |j_	ddl
m} || dd	lm} || dd
lm} || dd
lm} ||| d|v reddlm} || d|v rsdd
lm} || d|v rdd
lm}	 |	| tdd |D rddlm}
 |
|| | j|_|jt| j| j| j| jd |t t! |t"t# dd | j$pt%j&gD  }rddl'm(} |j||d | j)rddl'm*} || |t+ t%j,rt-.d |/dt0 | j/D ]3}|1dd\}}t2t34||}t56|r|| qt57|r!|/d| qt8d| dt9|}|S )NzThe 'supported_tasks' parameter was not provided to build_app and will be required in a future version. Defaulting to ('generate',).   
stacklevel)openapi_urldocs_url	redoc_urlr   )ra   rb   r   )r   r   )register_basic_api_routers)register_vllm_serve_api_routers)attach_routerr-   )register_generate_api_routerstranscriptionrealtimec                 s       | ]}|t v V  qd S Nr"   .0taskrJ   rJ   rK   	<genexpr>       zbuild_app.<locals>.<genexpr>)register_pooling_api_routers)allow_originsallow_credentialsallow_methodsallow_headersc                 S   s   g | ]}|r|qS rJ   rJ   )rm   keyrJ   rJ   rK   
<listcomp>   s    zbuild_app.<locals>.<listcomp>)AuthenticationMiddleware)tokens)XRequestIdMiddlewarez}CAUTION: Enabling log response in the API Server. This can include sensitive information and should be avoided in production.http.r8   zInvalid middleware z . Must be a function or a class.):warningswarnDeprecationWarningr.   disable_fastapi_docsr   r   enable_offline_docsstater3   (vllm.entrypoints.openai.basic.api_routerrc   vllm.entrypoints.serverd   )vllm.entrypoints.openai.models.api_routerre   %vllm.entrypoints.sagemaker.api_router+vllm.entrypoints.openai.generate.api_routerrf   1vllm.entrypoints.openai.speech_to_text.api_router+vllm.entrypoints.openai.realtime.api_routeranyvllm.entrypoints.poolingrq   	root_pathadd_middlewarer	   allowed_originsrs   allowed_methodsallowed_headersexception_handlerr   r   r   r   api_keyenvsVLLM_API_KEY$vllm.entrypoints.openai.server_utilsrx   enable_request_id_headersrz   r   "VLLM_DEBUG_LOG_API_SERVER_RESPONSEr<   rT   
middlewarer   rsplitgetattr	importlibimport_moduleinspectisclassiscoroutinefunction
ValueErrorr   )r3   r\   apprc   rd   register_models_api_routerregister_sagemaker_api_routerrf   "register_speech_to_text_api_routerregister_realtime_api_routerrq   ry   rx   rz   r   module_pathobject_nameimportedrJ   rJ   rK   	build_app   s   






r   engine_clientr   c              	      s  | j }|d u rtjdtdd t} jd ur j}n jg} jr)t j	d}nd } fdd|D }| |_
 j |_||_  |_t j}|jd urP|jjni }	t j|	}
t| ||
d|_|j I d H  t| |j|| j j jd|_d	|v rd
dlm} || | ||I d H  d|v rd
dlm} || | || d|v rd
dl m!} || | || t"dd |D rd
dl#m$} || | ||  j%|_%d
|_&d S )NzThe 'supported_tasks' parameter was not provided to init_app_state and will be required in a future version. Please pass 'supported_tasks' explicitly.r]   r^   )max_log_lenc                    s   g | ]	}t | jd qS ))name
model_path)r   model)rm   r   r3   rJ   rK   rw   0  s    z"init_app_state.<locals>.<listcomp>)r   base_model_pathslora_modules)request_loggerchat_templatechat_template_content_formattrust_request_chat_templatelog_error_stackr-   r   )init_generate_staterg   )init_transcription_staterh   )init_realtime_statec                 s   ri   rj   rk   rl   rJ   rJ   rK   ro   g  rp   z!init_app_state.<locals>.<genexpr>)init_pooling_state)'rN   r}   r~   r   r.   served_model_namer   rO   r   r   r   rQ   	log_statsr3   r   r   lora_configdefault_mm_lorasr   r   r   openai_serving_modelsinit_static_lorasr   r   r   r   openai_serving_tokenizationr   r   r   r   r   r   r   r   r   enable_server_load_trackingserver_load_metrics)r   r   r3   r\   rN   served_model_namesr   r   resolved_chat_templater   r   r   r   r   r   rJ   r   rK   init_app_state  sz   









r   addrc                 C   sZ   t j}t| d rt j}t j |t jd}|t jt jd |t jt jd |	|  |S )Nr   familytyper8   )
socketAF_INETr(   AF_INET6SOCK_STREAM
setsockopt
SOL_SOCKETSO_REUSEADDRSO_REUSEPORTbind)r   r   sockrJ   rJ   rK   create_server_socketp  s   
r   pathc                 C   s    t j t jt jd}||  |S )Nr   )r   AF_UNIXr   r   )r   r   rJ   rJ   rK   create_server_unix_socket}  s   
r   c                 C   sv   t  }| jr| j|vrtd| j dd| dt }| jj }r7||vr9td| dd| dd S d S )Nzinvalid tool call parser: z (chose from { ,z })zinvalid reasoning parser: )	r$   list_registeredenable_auto_tool_choicetool_call_parserKeyErrorjoinr!   structured_outputs_configreasoning_parser)r3   valid_tool_parsesvalid_reasoning_parsersr   rJ   rJ   rK   validate_api_server_args  s$   

r   zAPI server setup)	span_namec           	      C   s  t tt| j t|  | jrt| jdkrt| j | j	r+t| j	dkr+t
| j	 t|  | jr8t| j}n| jp<d| jf}t|}t  ddd}ttj| | jr`d| j }||fS |\}}| joi| j}t|rtd| d	n|pwd
}d|r}dnd d| d| }||fS )zRValidate API server args, set up signal handler, create socket
    ready to serve.    r4   Nc                  W   s   t d)N
terminated)KeyboardInterrupt)_rJ   rJ   rK   signal_handler  s   z$setup_server.<locals>.signal_handlerzunix:[]z0.0.0.0r{   sz://:r4   N)r   r<   VLLM_VERSIONr   r   tool_parser_pluginlenr$   import_tool_parserreasoning_parser_pluginr!   import_reasoning_parserr   udsr   hostportr   r*   signalSIGTERMssl_keyfilessl_certfiler(   )	r3   r   	sock_addrr   listen_addressr   r   is_ssl	host_partrJ   rJ   rK   setup_server  s,   
r  c                    s4   t d t| \}}t||| fi |I dH  dS )zRun a single-worker API server.	APIServerN)r)   r  run_server_worker)r3   uvicorn_kwargsr   r   rJ   rJ   rK   
run_server  s   r  c           
         s\  |j rt|j dkrt|j  |jr!t|jdkr!t|j t|}|dur-||d< t||d4 I dH Z}|	 I dH }t
d| t||}t||j||I dH  t
d|jjj|  t|f||j|j|j|j|j tj|j|j|j|j|j|j|j d|I dH }	W d  I dH  n1 I dH sw   Y  z|	I dH  W |!  dS |!  w )zRun a single API server worker.r   N
log_config)r2   zSupported tasks: %sz!Starting vLLM API server %d on %s)r   enable_ssl_refreshr   r   	log_level
access_logtimeout_keep_aliver   r   ssl_ca_certsssl_cert_reqsssl_ciphersh11_max_incomplete_event_sizeh11_max_header_count)"r   r   r$   r   r   r!   r   r   rL   get_supported_tasksr<   infor   r   r   rN   parallel_configrE   r   r  r   r   uvicorn_log_leveldisable_uvicorn_access_logr   VLLM_HTTP_TIMEOUT_KEEP_ALIVEr   r   r  r  r  r  r  close)
r   r   r3   r2   r  r  r   r\   r   shutdown_taskrJ   rJ   rK   r    s\   
(%r  __main__z*vLLM OpenAI-Compatible RESTful API server.)descriptionrj   r   )pr   r   r>   multiprocessing.forkserverr6   r:   r   r   tempfiler}   argparser   collections.abcr   
contextlibr   typingr   uvloopfastapir   r   fastapi.exceptionsr   fastapi.middleware.corsr	   starlette.datastructuresr
   	vllm.envsr   vllm.engine.arg_utilsr   vllm.engine.protocolr   vllm.entrypoints.chat_utilsr   vllm.entrypoints.launcherr   vllm.entrypoints.loggerr    vllm.entrypoints.openai.cli_argsr   r   'vllm.entrypoints.openai.models.protocolr   &vllm.entrypoints.openai.models.servingr   r   r   r   r   r   r   r   r   ,vllm.entrypoints.serve.elastic_ep.middlewarer   'vllm.entrypoints.serve.tokenize.servingr   vllm.entrypoints.utilsr   r   r   r   vllm.loggerr    vllm.reasoningr!   
vllm.tasksr"   r#   vllm.tool_parsersr$   vllm.tracingr%   vllm.usage.usage_libr&   vllm.utils.argparse_utilsr'   vllm.utils.network_utilsr(   vllm.utils.system_utilsr)   r*   vllm.versionr+   r   TemporaryDirectory__annotations__r<   r.   tupleOPENAI_API_SERVERrF   rV   strrL   rG   r   r   intr   r   r   r  r  r  __name__parser
parse_argsr3   runrJ   rJ   rJ   rK   <module>   s   

#5

{

[

,

;