o
    iT                  
   @   s  U d dl Z d dlZd dlmZmZ d dlmZ d dlmZ d dl	m
Z
mZmZ d dlmZ d dlmZ d dlmZmZ d dlmZmZ d d	lmZ d d
lmZmZ d dlmZ d dlmZ d dl m!Z! d dl"m#Z#m$Z$m%Z%m&Z&m'Z' d dl(m)Z)m*Z* ee+Z,eee-gdf Z.e/d Z0e0e.B Z1G dd deZ2de3e1 fddZ4G dd de2Z5G dd de2Z6G dd de6e5Z7G dd de5Z8G dd de5Z9ee
B eB Z:ee;d< d e:d!e3e- d"e<de=e-e:f fd#d$Z>d%e3e- d&e-de3e- fd'd(Z?d&e-de3e- fd)d*Z@G d+d, d,ZAdS )-    N)ABCabstractmethod)Callable)	TypeAlias)CounterGauge	Histogram)CUDAGraphLogging)SupportsMetricsInfo
VllmConfig)KVConnectorLoggingKVConnectorPrometheus)init_logger)STAT_LOGGER_PLUGINS_GROUPload_plugins_by_group)FinishReason)PerfMetricsLogging)unregister_vllm_metrics)CachingMetricsIterationStatsMultiModalCacheStatsPromptTokenStatsSchedulerStats)SpecDecodingLoggingSpecDecodingPromStatLoggerBaseAggregateStatLoggerBasec                
   @   s~   e Zd ZdZeddedefddZe		ddedB d	e	dB d
e
dB defddZedd Zdd ZdedefddZdS )r   a   Interface for logging metrics.

    API users may define custom loggers that implement this interface.
    However, note that the `SchedulerStats` and `IterationStats` classes
    are not considered stable interfaces and may change in future versions.
    r   vllm_configengine_indexc                 C      d S N )selfr   r   r!   r!   M/home/ubuntu/vllm_env/lib/python3.10/site-packages/vllm/v1/metrics/loggers.py__init__0      zStatLoggerBase.__init__Nscheduler_statsiteration_statsmm_cache_stats
engine_idxc                 C   r   r    r!   r"   r&   r'   r(   r)   r!   r!   r#   record3   s   zStatLoggerBase.recordc                 C   r   r    r!   r"   r!   r!   r#   log_engine_initialized<   r%   z%StatLoggerBase.log_engine_initializedc                 C   r   r    r!   r,   r!   r!   r#   log?   r%   zStatLoggerBase.logis_awakelevelc                 C   r   r    r!   )r"   r/   r0   r!   r!   r#   record_sleep_stateB   r%   z!StatLoggerBase.record_sleep_stater   Nr   )__name__
__module____qualname____doc__r   r   intr$   r   r   r   r+   r-   r.   r1   r!   r!   r!   r#   r   (   s(    
returnc                  C   sR   g } t t D ]\}}t|trt|ts!td|d|d| | q| S )NzStat logger plugin z+ must be a subclass of StatLoggerBase (got z).)	r   r   items
isinstancetype
issubclassr   	TypeErrorappend)	factoriesnameplugin_classr!   r!   r#   !load_stat_logger_plugin_factoriesF   s   rC   c                   @   s*   e Zd ZdZededee fddZdS )r   zNAbstract base class for loggers that
    aggregate across multiple DP engines.r   engine_indexesc                 C   r   r    r!   r"   r   rD   r!   r!   r#   r$   [   r%   z AggregateStatLoggerBase.__init__N)	r4   r5   r6   r7   r   r   listr8   r$   r!   r!   r!   r#   r   W   s    c                	   @   s   e Zd Zd"dedefddZdd Zdefd	d
Zde	fddZ
dededefddZedd Z		d#dedB de	dB dedB defddZdd Zdd Zdd Zd d! ZdS )$LoggingStatLoggerr   r   r   c                 C   s   || _ || _| t  t | _t | _t | _	t | _
t | _| jj}t|| _d | _| jjjr>t| jjj| jjj| _d| _d| _d| _d| _|  rUt|| _d S d S )N        F)r   r   _resettime	monotonicr   last_scheduler_statsr   prefix_caching_metrics connector_prefix_caching_metricsmm_caching_metricsr   spec_decoding_loggingkv_transfer_configr   kv_connector_loggingcudagraph_loggingobservability_configcudagraph_metricsr	   compilation_configcudagraph_modecudagraph_capture_sizeslast_prompt_throughputlast_generation_throughputengine_is_idle
aggregated_enable_perf_statsr   perf_metrics_logging)r"   r   r   rQ   r!   r!   r#   r$   `   s.   

zLoggingStatLogger.__init__c                 C   s"   || _ d| _d| _d| _d| _d S r3   )last_log_timenum_prompt_tokensnum_generation_tokensnum_corrupted_reqsnum_preemptions)r"   nowr!   r!   r#   rI   ~   s
   
zLoggingStatLogger._resetr9   c                 C   s
   | j jjS r    )r   rT   enable_mfu_metricsr,   r!   r!   r#   r]      s   
z$LoggingStatLogger._enable_perf_statsr'   c                 C   sF   |  j |jj7  _ |  j|j7  _|  j|j7  _|  j|j7  _d S r    )r`   prompt_token_statscomputedra   rb   rc   num_preempted_reqs)r"   r'   r!   r!   r#   _track_iteration_stats   s   z(LoggingStatLogger._track_iteration_statstracked_statsrd   c                 C   s"   || j  }|dkrdS t|| S )NrH   )r_   float)r"   rj   rd   
delta_timer!   r!   r#   _get_throughput   s   
z!LoggingStatLogger._get_throughputc                 C   s   d | jS )NzEngine {:03d}: )formatr   r,   r!   r!   r#   
log_prefix   s   zLoggingStatLogger.log_prefixNr&   r(   r)   c                 C   s   |r|  | |dur[| j|j |jdur| j|j |jdur*| j|j |j }r5| j	| | j
durF|jdurF| j
|j | jsL|| _|j }r[|  r[| j| |re| j| dS dS )zLog Stats to standard output.N)ri   rM   observeprefix_cache_statsconnector_prefix_cache_statsrN   spec_decoding_statsrP   kv_connector_statsrR   rS   cudagraph_statsr\   rL   
perf_statsr]   r^   rO   )r"   r&   r'   r(   r)   rt   rv   r!   r!   r#   r+      s,   





zLoggingStatLogger.recordc                 C   sV   t  }| | j|}| | j|}| | t||| j| jf | _	|| _|| _d S r    )
rJ   rK   rm   r`   ra   rI   anyrY   rZ   r[   )r"   rd   prompt_throughputgeneration_throughputr!   r!   r#   _update_stats   s   

zLoggingStatLogger._update_statsc                 C   r   r    r!   r,   r!   r!   r#   aggregate_scheduler_stats      z+LoggingStatLogger.aggregate_scheduler_statsc                 C   sd  |    |   | jrtjntj}g d}| j| j| jj	| jj
g}| jdkr1|d || j |ddg || jjd | jjd g tjrU|d || j | jjsg|d || jjd  | jjsy|d	 || jjd  || jd
| g|R   | jj|d | jj|d | jd ur| jj|d |  r| jj|| jd d S d S )N)z$Avg prompt throughput: %.1f tokens/sz(Avg generation throughput: %.1f tokens/szRunning: %d reqszWaiting: %d reqsr   zPreemptions: %dzGPU KV cache usage: %.1f%%zPrefix cache hit rate: %.1f%%d   zCorrupted: %d reqsz&External prefix cache hit rate: %.1f%%zMM cache hit rate: %.1f%%z, )log_fn)r~   ro   )rz   r{   r[   loggerdebuginforY   rZ   rL   num_running_reqsnum_waiting_reqsrc   r?   extendkv_cache_usagerM   hit_rateenvsVLLM_COMPUTE_NANS_IN_LOGITSrb   rN   emptyrO   ro   joinrP   r.   rR   rS   r]   r^   )r"   r~   	log_partslog_argsr!   r!   r#   r.      sV   







zLoggingStatLogger.logc                 C   s(   | j jjrtd| j| j jj d S d S )NzSEngine %03d: vllm cache_config_info with initialization after num_gpu_blocks is: %d)r   cache_confignum_gpu_blocksr   r   r   r,   r!   r!   r#   r-     s   
z(LoggingStatLogger.log_engine_initializedr2   r3   )r4   r5   r6   r   r8   r$   rI   boolr]   r   ri   rk   rm   propertyro   r   r   r+   rz   r{   r.   r-   r!   r!   r!   r#   rG   _   s.    	

#:rG   c                	   @   s   e Zd Zdedee fddZedd Zde	fdd	Z
	
	dded
B ded
B ded
B defddZdd Zdd Zdd Zd
S )AggregatedLoggingStatLoggerr   rD   c                 C   s2   || _ dd | j D | _tj| |dd d| _d S )Nc                 S   s   i | ]}|t  qS r!   )r   .0idxr!   r!   r#   
<dictcomp>   s    z8AggregatedLoggingStatLogger.__init__.<locals>.<dictcomp>)r   T)rD   last_scheduler_stats_dictrG   r$   r\   rE   r!   r!   r#   r$     s   
z$AggregatedLoggingStatLogger.__init__c                 C   s   d t| jS )Nz{} Engines Aggregated: )rn   lenrD   r,   r!   r!   r#   ro   &  s   z&AggregatedLoggingStatLogger.log_prefixr9   c                 C   s   dS )NFr!   r,   r!   r!   r#   r]   *  r|   z.AggregatedLoggingStatLogger._enable_perf_statsNr   r&   r'   r(   r)   c                 C   sH   || j vrtd| d S tj| ||||d |d ur"|| j|< d S d S NzUnexpected engine_idx: %dr(   r)   )rD   r   warningrG   r+   r   r*   r!   r!   r#   r+   .  s   
z"AggregatedLoggingStatLogger.recordc                 C   sh   t  | _| j D ]}| j j|j7  _| j j|j7  _| j j|j7  _q	| j jt| j  _d S r    )r   rL   r   valuesr   r   r   r   )r"   rL   r!   r!   r#   r{   B  s   z5AggregatedLoggingStatLogger.aggregate_scheduler_statsc                 C   s   t |  d S r    )rG   r.   r,   r!   r!   r#   r.   P  s   zAggregatedLoggingStatLogger.logc                 C   s,   | j jjrtdt| j| j jj d S d S )NzR%d Engines: vllm cache_config_info with initialization after num_gpu_blocks is: %d)r   r   r   r   r   r   rD   r,   r!   r!   r#   r-   S  s   
z2AggregatedLoggingStatLogger.log_engine_initializedr3   )r4   r5   r6   r   rF   r8   r$   r   ro   r   r]   r   r   r   r+   r{   r.   r-   r!   r!   r!   r#   r     s.    


r   c                	   @   sf   e Zd Zdedee deddfddZ		dd	edB d
e	dB de
dB defddZdd Zdd ZdS )PerEngineStatLoggerAdapterr   rD   per_engine_stat_logger_factoryr9   Nc                 C   s*   i | _ || _|D ]
}|||| j |< qd S r    )per_engine_stat_loggersrD   )r"   r   rD   r   r   r!   r!   r#   r$   ^  s   z#PerEngineStatLoggerAdapter.__init__r   r&   r'   r(   r)   c                 C   s6   || j vrtd| d S | j | j||||d d S r   )r   r   r   r+   r*   r!   r!   r#   r+   k  s   


z!PerEngineStatLoggerAdapter.recordc                 C      | j  D ]}|  qd S r    )r   r   r.   r"   per_engine_stat_loggerr!   r!   r#   r.   |     
zPerEngineStatLoggerAdapter.logc                 C   r   r    )r   r   r-   r   r!   r!   r#   r-     r   z1PerEngineStatLoggerAdapter.log_engine_initializedr3   )r4   r5   r6   r   rF   r8   PerEngineStatLoggerFactoryr$   r   r   r   r+   r.   r-   r!   r!   r!   r#   r   ]  s.    

r   c                	   @   s   e Zd ZeZeZeZe	Z
eZ	ddedee dB fddZdedefdd	Z		
ddedB dedB dedB defddZddedefddZdd ZdS )PrometheusStatLoggerNr   rD   c           *         s0  |d u rdg}|| _ t  || _|jj| _|jj| _ddg}|jj|jj	}fdd|D }| 
|j||| _| |||| _| jddd|d	}t||| _| jd
dd|d	}t||| _| jdd|dg ddi | _g d}|D ]fdd|D | j< qu|   | jddd|d	}	t|	|| _tjr| jdd|d}
t|
|| _| jdd|d}t||| _| jdd|d}t||| _| jdd|d}t||| _| jdd|d}t||| _| jdd |d}t||| _| jd!d"|d}t||| _| jd#d$|d}t||| _ | jd%d&|d}t||| _!| jd'd(|d)g d i | _"t#j$D ] fd*d|D | j"< q5| jd+d,|d}t||| _%| jd-d.|d}t||| _&| jd/d0|d}t||| _'i | _(| jd1d2|d3g dt)D ]fd4d|D | j(< q| j*d5d&t+||d6}t||| _,| j*d7d0t+||d6}t||| _-| j*d8d9g d:|d6}t||| _.| j*d;d<t+||d6}t||| _/| j*d=d>g d?|d6}t||| _0| j*d@dAt+||d6}t||| _1| j*dBdCg dD|d6}t||| _2| j*dEdFg dG|d6}t||| _3| j*dHdIg dG|d6}t||| _4g dJ}| j*dKdL||d6} t| || _5| j*dMdN||d6}!t|!|| _6| j*dOdP||d6}"t|"|| _7| j*dQdR||d6}#t|#|| _8| j*dSdT||d6}$t|$|| _9| j*dUdVt+||d6}%t|%|| _:| jrg dW}&| j*dXdY|&|d6}'t|'|| _;| j*dZd[|&|d6}(t|(|| _<| j*d\d]|&|d6})t|)|| _=n	i | _;i | _<i | _=d | _>|j?d urt@| j d^krtABd_ d`| _Cda| _Ddb| _E|j?jF| _G| jdcddde| jC| jD| jEgd	| _>d S d S )fNr   
model_nameenginec                    s   i | ]	}| t |gqS r!   )strr   )r   r!   r#   r     s    z1PrometheusStatLogger.__init__.<locals>.<dictcomp>zvllm:num_requests_runningz.Number of requests in model execution batches.
mostrecentrA   documentationmultiprocess_mode
labelnameszvllm:num_requests_waitingz+Number of requests waiting to be processed.zvllm:engine_sleep_statezEngine sleep state; awake = 0 means engine is sleeping; awake = 1 means engine is awake; weights_offloaded = 1 means sleep level 1; discard_all = 1 means sleep level 2.sleep_state)rA   r   r   r   )awakeweights_offloadeddiscard_allc                    s   i | ]}| j |d qS ))r   r   r   )labelsr   )gauge_engine_sleep_stater   sr!   r#   r     s    zvllm:kv_cache_usage_percz*KV-cache usage. 1 means 100 percent usage.zvllm:corrupted_requestszMCorrupted requests, in terms of total number of requests with NaNs in logits.)rA   r   r   zvllm:prefix_cache_queriesz;Prefix cache queries, in terms of number of queried tokens.zvllm:prefix_cache_hitsz7Prefix cache hits, in terms of number of cached tokens.z"vllm:external_prefix_cache_querieszsExternal prefix cache queries from KV connector cross-instance cache sharing, in terms of number of queried tokens.zvllm:external_prefix_cache_hitszoExternal prefix cache hits from KV connector cross-instance cache sharing, in terms of number of cached tokens.zvllm:mm_cache_queriesz?Multi-modal cache queries, in terms of number of queried items.zvllm:mm_cache_hitsz;Multi-modal cache hits, in terms of number of cached items.zvllm:num_preemptionsz0Cumulative number of preemption from the engine.zvllm:prompt_tokensz#Number of prefill tokens processed.zvllm:prompt_tokens_by_sourcez"Number of prompt tokens by source.sourcec                    s    i | ]}|  t|qS r!   r   r   r   )counter_prompt_tokens_by_sourcer   r   r!   r#   r   [  s    
zvllm:prompt_tokens_cachedz2Number of cached prompt tokens (local + external).zvllm:prompt_tokens_recomputedz4Number of cached tokens recomputed for forward pass.zvllm:generation_tokensz&Number of generation tokens processed.zvllm:request_successz)Count of successfully processed requests.finished_reasonc              	      s$   i | ]}|  t|tqS r!   r   r   )counter_request_success_baser   reasonr!   r#   r     s    zvllm:request_prompt_tokens)rA   r   bucketsr   zvllm:request_generation_tokenszvllm:iteration_tokens_totalz.Histogram of number of tokens per engine_step.)             @         i   i   i   i   i    i @  z&vllm:request_max_num_generation_tokensz;Histogram of maximum number of requested generation tokens.zvllm:request_params_nz%Histogram of the n request parameter.)r         
      zvllm:request_params_max_tokensz.Histogram of the max_tokens request parameter.z vllm:time_to_first_token_secondsz,Histogram of time to first token in seconds.)MbP?{Gzt?{Gz?{Gz?g{Gz?gQ?g{Gz?皙?g      ?      ?      ?      ?      @      @      @      $@      4@      D@      T@g      d@g      @g      @z vllm:inter_token_latency_secondsz,Histogram of inter-token latency in seconds.)r   g?皙?g333333?r   g333333?皙?333333?g?r   r   r   r   r   r   r   r   r   r   z*vllm:request_time_per_output_token_secondsz7Histogram of time_per_output_token_seconds per request.)r   r   g?r   g      ?g       @r   r   r   g      .@r   g      >@r   g      I@g      N@g      ^@g      n@g      ~@g      @g      @g      @z vllm:e2e_request_latency_secondsz,Histogram of e2e request latency in seconds.zvllm:request_queue_time_secondsz5Histogram of time spent in WAITING phase for request.z#vllm:request_inference_time_secondsz5Histogram of time spent in RUNNING phase for request.z!vllm:request_prefill_time_secondsz5Histogram of time spent in PREFILL phase for request.z vllm:request_decode_time_secondsz4Histogram of time spent in DECODE phase for request.z'vllm:request_prefill_kv_computed_tokenszMHistogram of new KV tokens computed during prefill (excluding cached tokens).)r   gMb`?r   r   r   r   r   r   r   r   r   r   r   r      <   x   i,  iX  i  i  zvllm:kv_block_lifetime_secondsz|Histogram of KV cache block lifetime from allocation to eviction. Sampled metrics (controlled by --kv-cache-metrics-sample).z'vllm:kv_block_idle_before_evict_secondszqHistogram of idle time before KV cache block eviction. Sampled metrics (controlled by --kv-cache-metrics-sample).zvllm:kv_block_reuse_gap_secondszHistogram of time gaps between consecutive KV cache block accesses. Only the most recent accesses are recorded (ring buffer). Sampled metrics (controlled by --kv-cache-metrics-sample).r   zfvllm:lora_requests_info prometheus metrics may be incorrect/misleading with data parallel deployments.max_lorawaiting_lora_adaptersrunning_lora_adapterszvllm:lora_requests_infozRunning stats on lora requests.sum)HrD   r   r   rT   show_hidden_metricskv_cache_metricskv_cache_metrics_enabledmodel_configserved_model_namemax_model_len_spec_decoding_clsspeculative_configspec_decoding_prom_kv_connector_clskv_connector_prom
_gauge_clsmake_per_enginegauge_scheduler_runninggauge_scheduler_waitingr   r1   gauge_kv_cache_usager   r   _counter_clscounter_corrupted_requestscounter_prefix_cache_queriescounter_prefix_cache_hits&counter_connector_prefix_cache_queries#counter_connector_prefix_cache_hitscounter_mm_cache_queriescounter_mm_cache_hitscounter_num_preempted_reqscounter_prompt_tokensr   r   ALL_SOURCEScounter_prompt_tokens_cached counter_prompt_tokens_recomputedcounter_generation_tokenscounter_request_successr   _histogram_clsbuild_1_2_5_buckets#histogram_num_prompt_tokens_request'histogram_num_generation_tokens_requesthistogram_iteration_tokens+histogram_max_num_generation_tokens_requesthistogram_n_requesthistogram_max_tokens_requesthistogram_time_to_first_tokenhistogram_inter_token_latency'histogram_request_time_per_output_tokenhistogram_e2e_time_requesthistogram_queue_time_request histogram_inference_time_requesthistogram_prefill_time_requesthistogram_decode_time_request%histogram_prefill_kv_computed_requesthistogram_kv_block_lifetime$histogram_kv_block_idle_before_evicthistogram_kv_block_reuse_gapgauge_lora_infolora_configr   r   r   labelname_max_loralabelname_waiting_lora_adapterslabelname_running_lora_adapters	max_lorasr   )*r"   r   rD   r   r   per_engine_labelvaluesr   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r  r  r  r  r  r  r  r  r	  request_latency_bucketsr
  r  r  r  r  r  kv_cache_residency_bucketsr  r  r  r!   )r   r   r   r   r   r   r   r#   r$     s  



			
zPrometheusStatLogger.__init__r<   
config_objc                 C   s   |  }d|d< d\}}|dkrd}d}|d usJ d| | j||d| d	}| jD ]}|  }t||d< |jdi |d
 q-d S )N r   NNr   zvllm:cache_config_infoz(Information of the LLMEngine CacheConfigzUnknown metrics info type r   r   r   r!   )metrics_infor   keysrD   r   r   set)r"   r<   r  r  rA   r   
info_gauger   r!   r!   r#   log_metrics_info  s$   
z%PrometheusStatLogger.log_metrics_infor   r&   r'   r(   r)   c                 C   s  |dur| j | |j | j| |j | j| |j | j| |j	j
 | j| |j	j |jdurL| j| |jj
 | j| |jj |jdurY| j|j| |jdurf| j|j| | jr|jr| j| }| j| }| j| }|jD ]}||j ||j |jD ]}	||	 qq~| jdurd|j  }
d|j!  }| j"|
| j#|| j$| j%i}| jj&di |'  |dur| j(| |j
 | j)| |j |du rdS t*j+r| j,| |j- | j.| |j/ | j0| |j1 |j2}t3j4D ]}| j5| | |6| q| j7| |j8 | j9| |j: | j;| |j< | j=| |j1|j<  |j>D ]}| j?| | q?|j@D ]}| jA| | qN|jBD ]}| jC| | q]|jDD ]}| jE| | ql|jFD ]s}| jG|jH |   | jI| |jJ | jK| |jL | jM| |jN | jO| |jP | jQ| |jR |j1tS|jTd }| jU| | | jV| |j1 | jW| |j< | jX| |jY |jZr| j[| |jZ q{dS )zLog to prometheus.N,r   r!   )\r   r!  r   r   r   r   r   r   incrq   queriesr   hitsrr   r   r   rs   r   rp   rt   r   r   kv_cache_eviction_eventsr  r  r  lifetime_secondsidle_secondsreuse_gaps_secondsr  r   r   r   r   r  r  r  r   r   set_to_current_timer   r   r   r   r   rb   r   rh   r   r`   rf   r   r   r   get_by_sourcer   cached_tokensr   recomputed_tokensr   ra   r  max_num_generation_tokens_iterr  n_params_iterr  time_to_first_tokens_iterr  inter_token_latencies_iterr  finished_requestsr   finish_reasonr
  e2e_latencyr  queued_timer  prefill_timer  inference_timer  decode_timemaxnum_cached_tokensr  r  r  r	  mean_time_per_output_tokenmax_tokens_paramr  )r"   r&   r'   r(   r)   lifetime_hist	idle_hist
reuse_histeventgapr   r   lora_info_labelsptsr   max_gen_tokensn_paramttftitlfinished_requestprefill_kv_computedr!   r!   r#   r+     s  





































zPrometheusStatLogger.recordsleepr0   c                 C   s~   d}d}d}|dkrd}|dkrd}n|dkrd}| j D ] }| jd | | | jd | | | jd | | qd S )Nr   r   r   r   r   r   )rD   r   r!  )r"   rL  r0   r   r   r   r)   r!   r!   r#   r1     s    
z'PrometheusStatLogger.record_sleep_statec                 C   s   |  d| jj d S )Nr   )r#  r   r   r,   r!   r!   r#   r-     s   z+PrometheusStatLogger.log_engine_initializedr    r3   r   r   )r4   r5   r6   r   r   r   r   r   r   r   r   r   r   r   rF   r8   r$   r   r
   r#  r   r   r   r+   r1   r-   r!   r!   r!   r#   r     s>    

    b
 r   
PromMetricmetricengine_idxsr   c                    s    fdd|D S )Nc                    s   i | ]}|  t|qS r!   r   r   rO  r   r!   r#   r     s    z#make_per_engine.<locals>.<dictcomp>r!   )rO  rP  r   r!   rQ  r#   r     s   r   mantissa_lst	max_valuec                 C   sD   d}g }	 | D ]}|d|  }||kr| | q|  S |d7 }q)z
    Builds a list of buckets with increasing powers of 10 multiplied by
    mantissa values until the value exceeds the specified maximum.

    r   Tr   r   )r?   )rR  rS  exponentr   mvaluer!   r!   r#   build_buckets  s   rW  c                 C   s   t g d| S )zR
    Example:
    >>> build_1_2_5_buckets(100)
    [1, 2, 5, 10, 20, 50, 100]
    )r   r   r   )rW  )rS  r!   r!   r#   r     s   r   c                   @   s   e Zd ZdZ					ddedee dB dee dB d	ed
edefddZ			dde
dB dedB dedB dedB fddZddedefddZdd Zdd ZdS ) StatLoggerManagera  
    StatLoggerManager:
        Logging happens at the level of the EngineCore (per scheduler).
         * DP: >1 EngineCore per AsyncLLM - loggers for each EngineCore.
         * With Local Logger, just make N copies for N EngineCores.
         * With Prometheus, we need a single logger with N "labels"

        This class abstracts away this implementation detail from
        the AsyncLLM, allowing the AsyncLLM to just call .record()
        and .log() to a simple interface.
    NTFr   r   rP  custom_stat_loggersenable_default_loggersaggregate_engine_loggingclient_countc                 C   s   |r|ndg| _ g | _g }|d ur|| |r3ttjr3|dkr(td n|r,tnt	}|
| d}	|D ])}
t|
trRt|
trR|
|| j d}t|trQd}	nt|| j |
d}| j
| q7|	so| j
t|| j  d S d S )Nr   r   zfAsyncLLM created with api_server_count more than 1; disabling stats logging to avoid incomplete stats.F)r   rD   T)r   rD   r   )rD   stat_loggersr   r   isEnabledForloggingINFOr   r   rG   r?   r;   r<   r=   r   r   r   )r"   r   rP  rY  rZ  r[  r\  stat_logger_factoriesdefault_logger_factorycustom_prometheus_loggerstat_logger_factoryglobal_stat_loggerr!   r!   r#   r$     sL   	



zStatLoggerManager.__init__r&   r'   r(   r)   c                 C   s.   |d u rd}| j D ]}|j||||d q	d S )Nr   r   )r]  r+   )r"   r&   r'   r(   r)   r   r!   r!   r#   r+     s   
zStatLoggerManager.recordr   rL  r0   c                 C   s   | j D ]}||| qd S r    )r]  r1   )r"   rL  r0   r   r!   r!   r#   r1   $  s   
z$StatLoggerManager.record_sleep_statec                 C      | j D ]}|  qd S r    )r]  r.   )r"   r   r!   r!   r#   r.   (     

zStatLoggerManager.logc                 C   rf  r    )r]  r-   )r"   
agg_loggerr!   r!   r#   r-   ,  rg  z(StatLoggerManager.log_engine_initialized)NNTFr   r  rM  )r4   r5   r6   r7   r   rF   r8   StatLoggerFactoryr   r$   r   r   r   r+   r1   r.   r-   r!   r!   r!   r#   rX    sD    


7
rX  )Br_  rJ   abcr   r   collections.abcr   typingr   prometheus_clientr   r   r   	vllm.envsr   vllm.compilation.cuda_graphr	   vllm.configr
   r   4vllm.distributed.kv_transfer.kv_connector.v1.metricsr   r   vllm.loggerr   vllm.pluginsr   r   vllm.v1.enginer   vllm.v1.metrics.perfr   vllm.v1.metrics.prometheusr   vllm.v1.metrics.statsr   r   r   r   r   vllm.v1.spec_decode.metricsr   r   r4   r   r8   r   r<   AggregateStatLoggerFactoryri  r   rF   rC   r   rG   r   r   r   rN  __annotations__objectdictr   rW  r   rX  r!   r!   r!   r#   <module>   s`   
 ;D(      0

	