o
    -iw,                     @   sZ  d dl Z d dlZd dlZd dlZd dlmZ d dlmZ d dlm	Z	m
Z
mZ d dlmZmZ d dlmZ d dlmZ d dlmZ d dlmZ d d	lmZ d d
lmZmZ d dlmZ d dlmZm Z  d dl!m"Z" d dl#m$Z$ d dl%m&Z& d dl'm(Z(m)Z) ee*Z+dZ,G dd deZ-de.e fddZ/de j0fddZ1de j0fddZ2	d	dddZ3dS )     N)CLISubcommand)
run_serverrun_server_workersetup_server)make_arg_parservalidate_parsed_serve_args)VLLM_SUBCMD_PARSER_EPILOG)init_logger)UsageContext)FlexibleArgumentParser)get_tcp_uri)decorate_logsset_process_title)EngineCoreProc)CoreEngineProcManagerlaunch_core_engines)Executor)MultiprocExecutor)setup_multiprocess_prometheus)APIServerProcessManagerwait_for_completion_or_failurea0  Launch a local OpenAI-compatible API server to serve LLM
completions via HTTP. Defaults to Qwen/Qwen3-0.6B if no model is specified.

Search by using: `--help=<ConfigGroup>` to explore options by section (e.g.,
--help=ModelConfig, --help=Frontend)
  Use `--help=all` to show all available flags at once.
c                   @   sT   e Zd ZdZdZedejddfddZdejddfdd	Z	d
ej
defddZdS )ServeSubcommandz(The `serve` subcommand for the vLLM CLI.serveargsreturnNc                 C   s  t | dr| jd ur| j| _| jr'| jd ur$| jdkr$td| j dd| _| jp.| jd u}| jp6| j	d u}|r?|r?td| jd u ro|rJd| _n%|r_| j
pPd| _| jdkr^td| j n| j| _| jdkrotd| j | jdk rzt|  d S | jdkrt|  d S tt|  d S )	N	model_tagr   z--api-server-count=zN cannot be used with --headless (no API servers are started in headless mode).a  Cannot use both external and hybrid data parallel load balancing modes. External LB is enabled via --data-parallel-external-lb or --data-parallel-rank. Hybrid LB is enabled via --data-parallel-hybrid-lb or --data-parallel-start-rank. Use one mode or the other.   zPDefaulting api_server_count to data_parallel_size_local (%d) for hybrid LB mode.z7Defaulting api_server_count to data_parallel_size (%d).)hasattrr   modelheadlessapi_server_count
ValueErrordata_parallel_external_lbdata_parallel_rankdata_parallel_hybrid_lbdata_parallel_start_rankdata_parallel_size_localloggerinfodata_parallel_sizerun_headlessrun_multi_api_serveruvlooprunr   )r   is_external_lbis_hybrid_lb r0   W/home/ubuntu/veenaModal/venv/lib/python3.10/site-packages/vllm/entrypoints/cli/serve.pycmd/   sL   




zServeSubcommand.cmdc                 C   s   t | d S N)r   )selfr   r0   r0   r1   validateq   s   zServeSubcommand.validate
subparsersc                 C   s0   |j | jdtdd}t|}tj| jd|_|S )NzNLaunch a local OpenAI-compatible API server to serve LLM completions via HTTP.z vllm serve [model_tag] [options])helpdescriptionusage)subcmd)
add_parsernameDESCRIPTIONr   r   formatepilog)r4   r6   serve_parserr0   r0   r1   subparser_initt   s   zServeSubcommand.subparser_init)__name__
__module____qualname____doc__r<   staticmethodargparse	Namespacer2   r5   _SubParsersActionr   rA   r0   r0   r0   r1   r   *   s    Ar   r   c                   C   s   t  gS r3   )r   r0   r0   r0   r1   cmd_init   s   rJ   r   c                    sb  | j dkr	tdtj| }tj}|j|dd}|jr td|j	}|j
}|dkr.tdd  fd	d
}ttj| ttj| |jdkroddlm} |j}| d|j }	td||	 t|dd}
|
jdd d S |j}|j}t||}td|| ttj||j	jd|d|t||j  d	}z|!  W td |"  d S td |"  w )Nr   z.api_server_count can't be set in headless modeT)usage_contextr   z:data_parallel_hybrid_lb is not applicable in headless moder   z5data_parallel_size_local must be > 0 in headless modeFc                    s   t d|   sd td S )NzReceived %d signal.T)r'   debug
SystemExit)signumframeshutdown_requestedr0   r1   signal_handler   s
   z$run_headless.<locals>.signal_handler)__version__:zpLaunching vLLM (v%s) headless multiproc executor, with head node address %s for torch.distributed process group.)monitor_workers)inlinezQLaunching %d data parallel engine(s) in headless mode, with head node address %s.)		target_fnlocal_engine_countstart_indexlocal_start_indexvllm_configlocal_clienthandshake_addressexecutor_class	log_statszShutting down.)#r    r!   vllmAsyncEngineArgsfrom_cli_argsr
   OPENAI_API_SERVERcreate_engine_configr$   parallel_configr&   signalSIGTERMSIGINTnode_rank_within_dpvllm.versionrS   master_addrmaster_portr'   r(   r   start_worker_monitordata_parallel_master_ipdata_parallel_rpc_portr   r   r   run_engine_corer#   r   	get_classdisable_log_stats
join_firstclose)r   engine_argsrK   r[   re   rX   rR   VLLM_VERSIONhosthead_node_addressexecutorportr]   engine_managerr0   rP   r1   r*      sl   






r*   c                 C   sT  | j rJ | j}|dksJ |dkrt  t| \}}tj| }||_d|_t	j
}|j|d}|dkr;tjr;tdt|}|j }|j}	|	j}
|	jsS|
dksSJ d }t||||-\}}}tt||| ||j|j|rp| nd d}|
dks{|	jstd	i |}W d    n1 sw   Y  |d u r|j|d< td	i |}t|||d d S )
Nr   r   )rK   zIVLLM_ALLOW_RUNTIME_LORA_UPDATING cannot be used with api_server_count > 1)target_server_fnlisten_addresssockr   num_serversinput_addressesoutput_addressesstats_update_addressr   )api_server_managerr{   coordinatorr0   )r   r    r   r   r`   ra   rb   _api_process_count_api_process_rankr
   rc   rd   envs VLLM_ALLOW_RUNTIME_LORA_UPDATINGr!   r   rq   rr   re   r#   local_engines_onlyr   dictrun_api_server_worker_procinputsoutputsget_stats_publish_addressr   frontend_stats_publish_addressr   )r   num_api_serversr~   r   ru   rK   r[   r^   r_   re   dp_rankr   local_engine_managerr   	addressesapi_server_manager_kwargsr0   r0   r1   r+      sd   



r+   c                 K   sH   |pi }| dd}tdt| t  tt| |||fi | dS )z6Entrypoint for individual API server worker processes.client_indexr   	APIServerN)getr   strr   r,   r-   r   )r~   r   r   client_configuvicorn_kwargsserver_indexr0   r0   r1   r   !  s   r   r3   )r   N)4rG   rf   r,   r`   	vllm.envsr   vllm.entrypoints.cli.typesr   "vllm.entrypoints.openai.api_serverr   r   r    vllm.entrypoints.openai.cli_argsr   r   vllm.entrypoints.utilsr   vllm.loggerr	   vllm.usage.usage_libr
   vllm.utils.argparse_utilsr   vllm.utils.network_utilsr   vllm.utils.system_utilsr   r   vllm.v1.engine.corer   vllm.v1.engine.utilsr   r   vllm.v1.executorr   #vllm.v1.executor.multiproc_executorr   vllm.v1.metrics.prometheusr   vllm.v1.utilsr   r   rB   r'   r=   r   listrJ   rH   r*   r+   r   r0   r0   r0   r1   <module>   s<   	ZQI