o
    i-                     @   s   d Z ddlZddlZddlZddlZddlZddlZddlZddlZddl	m
Z
 ddlmZ ddlZddlmZ ddlmZmZ ddlmZ e
dd	 Zd
d Zdejdeeef ddfddZdejfddZdejfddZdS )aB  Benchmark the cold and warm startup time of vLLM models.

This script measures total startup time (including model loading, compilation,
and cache operations) for both cold and warm scenarios:
- Cold startup: Fresh start with no caches (temporary cache directories)
- Warm startup: Using cached compilation and model info
    N)contextmanager)Any)tqdm)#convert_to_pytorch_benchmark_formatwrite_to_json)
EngineArgsc               
   c   s    ddl m}  tjd}tjdd}z5|tjd< |   dV  W d   n1 s*w   Y  W tj|dd |r@|tjd< dS tj	dd dS tj|dd |rX|tjd< w tj	dd w )	z
    Context manager to measure cold startup time:
    1. Uses a temporary directory for vLLM cache to avoid any pollution
       between cold startup iterations.
    2. Uses inductor's fresh_cache to clear torch.compile caches.
    r   )fresh_cacheVLLM_CACHE_ROOTvllm_startup_bench_cold_)prefixNT)ignore_errors)
torch._inductor.utilsr   osenvirongettempfilemkdtempshutilrmtreepop)r   original_cache_roottemp_cache_dir r   M/home/ubuntu/vllm_env/lib/python3.10/site-packages/vllm/benchmarks/startup.pycold_startup    s$   
r   c           	   
   C   s   z?ddl m} t }|di t| }t | }d}t|jdr5|jj}t|dr5|j	dur5|j	j
}|||d W dS  ty^ } z|d |t| W Y d}~dS d}~ww )	z
    Run LLM startup in a subprocess and return timing metrics via a queue.
    This ensures complete isolation between iterations.
    r   )LLMg        vllm_configcompilation_configN)total_startup_timecompilation_timer   )vllmr   timeperf_counterdataclassesasdicthasattr
llm_enginer   r   r   put	Exceptionstr)	engine_argsresult_queuer   
start_timellmr   r   r   er   r   r   run_startup_in_subprocess:   s,   


r/   argsresultsreturnc                 C   s  t j| jd }t| d|d gi|d |d dd}|r&t| d| t| d|d gi|d	 |d
 dd}|rCt| d| t| d|d gi|d |d dd}|r`t| d| t| d|d gi|d |d dd}|rt| d| d S d S )Nr   avg_cold_startup_timecold_startup_timescold_startup_percentiles)r4   r5   )r0   metrics
extra_infoz.cold_startup.pytorch.jsonavg_cold_compilation_timecold_compilation_timescold_compilation_percentiles)r9   r:   z.cold_compilation.pytorch.jsonavg_warm_startup_timewarm_startup_timeswarm_startup_percentiles)r<   r=   z.warm_startup.pytorch.jsonavg_warm_compilation_timewarm_compilation_timeswarm_compilation_percentiles)r?   r@   z.warm_compilation.pytorch.json)r   pathsplitextoutput_jsonr   r   )r0   r1   	base_namecold_startup_recordscold_compilation_recordswarm_startup_recordswarm_compilation_recordsr   r   r    save_to_pytorch_benchmark_format`   s\   









rI   parserc                 C   sV   | j dtddd | j dtddd | j dtdd	d | j d
td dd t| } | S )Nz--num-iters-cold   z"Number of cold startup iterations.)typedefaulthelpz--num-iters-warmup   z>Number of warmup iterations before benchmarking warm startups.z--num-iters-warmz"Number of warm startup iterations.z--output-jsonz5Path to save the startup time results in JSON format.)add_argumentintr)   r   add_cli_args)rJ   r   r   r   rR      s4   
rR   c                    s  t jddd t|   fdd}dtjd< td td	 g }g }tt| j	d
dD ]&}t
  | }||d  ||d  W d    n1 sMw   Y  q,td tt| jddD ]}|  q`td g }g }tt| jddD ]}| }||d  ||d  qwt|}	t|}
t|}t|}t|	}t|
}t|}t|}g d}t|	|}t|
|}t||}t||}td td td td td|dd td|dd td t||D ]\}}td| d|dd qtd t||D ]\}}td| d|dd qtd td|dd td|dd td t||D ]\}}td| d|dd qEtd t||D ]\}}td| d|dd q`td | jrt|t|||tt|| tt|| t|t|||tt|| tt|| d }t| jd!}tj||d"d# W d    n	1 sw   Y  t| | d S d S )$NspawnT)forcec                     st   t  } t jt | fd}|  |  |  s6|  }|du r4|  s0|  }td| td|S td)z
        Create LLM instance in a subprocess and measure startup time.
        Returns timing metrics, using subprocess for complete isolation.
        )targetr0   NzSubprocess failed: z$Subprocess failed with unknown errorz"Subprocess did not return a result)	multiprocessingQueueProcessr/   startjoinemptyr   RuntimeError)r+   processresult	error_msgr*   r   r   create_llm_and_measure_startup   s$   z,main.<locals>.create_llm_and_measure_startup0VLLM_ENABLE_V1_MULTIPROCESSINGzESetting VLLM_ENABLE_V1_MULTIPROCESSING=0 to collect startup metrics.
zMeasuring cold startup time...
zCold startup iterations)descr   r   z,
Warming up for warm startup measurement...
zWarmup iterationsz 
Measuring warm startup time...
zWarm startup iterations)
      2   K   Z   c   z=
============================================================zSTARTUP TIME BENCHMARK RESULTSz<============================================================z
COLD STARTUP:zAvg total startup time: z.2fz secondszAvg compilation time:   zStartup time percentiles:z  z%: zCompilation time percentiles:z
WARM STARTUP:)r3   r8   r4   r9   r5   r:   r;   r>   r<   r?   r=   r@   w   )indent)rV   set_start_methodr   from_cli_argsr   r   printr   rangenum_iters_coldr   appendnum_iters_warmupnum_iters_warmnparraymean
percentileziprC   floatdicttolistopenjsondumprI   )r0   ra   r4   r9   ir6   _r<   r?   cold_startup_arraycold_compilation_arraywarm_startup_arraywarm_compilation_arrayavg_cold_startupavg_cold_compilationavg_warm_startupavg_warm_compilationpercentagesr5   r:   r=   r@   
percentagery   r1   fr   r`   r   main   s   









r   )__doc__argparser#   r   rV   r   r   r   r!   
contextlibr   typingr   numpyrv   r   vllm.benchmarks.lib.utilsr   r   vllm.engine.arg_utilsr   r   r/   	Namespacer|   r)   rI   ArgumentParserrR   r   r   r   r   r   <module>   s6   
&

>