o
    .i$                  
   @   s
  U d dl Z d dlZd dlmZmZ d dlmZ d dlmZ d dl	m
Z
mZmZ d dlmZ d dlmZ d dlmZmZ d dlmZmZ d d	lmZ d d
lmZmZ d dlmZ d dlmZ d dl m!Z! d dl"m#Z#m$Z$m%Z%m&Z& d dl'm(Z(m)Z) ee*Z+eee,gdf Z-e.d Z/e/e-B Z0G dd deZ1de2e0 fddZ3G dd de1Z4G dd de1Z5G dd de5e4Z6G dd de4Z7G dd de4Z8ee
B eB Z9ee:d< d e9d!e2e, d"e;de<e,e9f fd#d$Z=d%e2e, d&e,de2e, fd'd(Z>d&e,de2e, fd)d*Z?G d+d, d,Z@dS )-    N)ABCabstractmethod)Callable)	TypeAlias)CounterGauge	Histogram)CUDAGraphLogging)SupportsMetricsInfo
VllmConfig)KVConnectorLoggingKVConnectorPrometheus)init_logger)STAT_LOGGER_PLUGINS_GROUPload_plugins_by_group)FinishReason)PerfMetricsLogging)unregister_vllm_metrics)CachingMetricsIterationStatsMultiModalCacheStatsSchedulerStats)SpecDecodingLoggingSpecDecodingPromStatLoggerBaseAggregateStatLoggerBasec                
   @   s~   e Zd ZdZeddedefddZe		ddedB d	e	dB d
e
dB defddZedd Zdd ZdedefddZdS )r   a   Interface for logging metrics.

    API users may define custom loggers that implement this interface.
    However, note that the `SchedulerStats` and `IterationStats` classes
    are not considered stable interfaces and may change in future versions.
    r   vllm_configengine_indexc                 C      d S N )selfr   r   r    r    T/home/ubuntu/veenaModal/venv/lib/python3.10/site-packages/vllm/v1/metrics/loggers.py__init__/      zStatLoggerBase.__init__Nscheduler_statsiteration_statsmm_cache_stats
engine_idxc                 C   r   r   r    r!   r%   r&   r'   r(   r    r    r"   record2   s   zStatLoggerBase.recordc                 C   r   r   r    r!   r    r    r"   log_engine_initialized;   r$   z%StatLoggerBase.log_engine_initializedc                 C   r   r   r    r+   r    r    r"   log>   r$   zStatLoggerBase.logis_awakelevelc                 C   r   r   r    )r!   r.   r/   r    r    r"   record_sleep_stateA   r$   z!StatLoggerBase.record_sleep_stater   Nr   )__name__
__module____qualname____doc__r   r   intr#   r   r   r   r*   r,   r-   r0   r    r    r    r"   r   '   s(    
returnc                  C   sR   g } t t D ]\}}t|trt|ts!td|d|d| | q| S )NzStat logger plugin z+ must be a subclass of StatLoggerBase (got z).)	r   r   items
isinstancetype
issubclassr   	TypeErrorappend)	factoriesnameplugin_classr    r    r"   !load_stat_logger_plugin_factoriesE   s   rB   c                   @   s*   e Zd ZdZededee fddZdS )r   zNAbstract base class for loggers that
    aggregate across multiple DP engines.r   engine_indexesc                 C   r   r   r    r!   r   rC   r    r    r"   r#   Z   r$   z AggregateStatLoggerBase.__init__N)	r3   r4   r5   r6   r   r   listr7   r#   r    r    r    r"   r   V   s    c                	   @   s   e Zd Zd"dedefddZdd Zdefd	d
Zde	fddZ
dededefddZedd Z		d#dedB de	dB dedB defddZdd Zdd Zdd Zd d! ZdS )$LoggingStatLoggerr   r   r   c                 C   s   || _ || _| t  t | _t | _t | _	t | _
t | _| jj}t|| _d | _| jjjr>t| jjj| jjj| _d| _d| _d| _d| _|  rUt|| _d S d S )N        F)r   r   _resettime	monotonicr   last_scheduler_statsr   prefix_caching_metrics connector_prefix_caching_metricsmm_caching_metricsr   spec_decoding_loggingkv_transfer_configr   kv_connector_loggingcudagraph_loggingobservability_configcudagraph_metricsr	   compilation_configcudagraph_modecudagraph_capture_sizeslast_prompt_throughputlast_generation_throughputengine_is_idle
aggregated_enable_perf_statsr   perf_metrics_logging)r!   r   r   rP   r    r    r"   r#   _   s.   

zLoggingStatLogger.__init__c                 C   s"   || _ d| _d| _d| _d| _d S r2   )last_log_timenum_prompt_tokensnum_generation_tokensnum_corrupted_reqsnum_preemptions)r!   nowr    r    r"   rH   }   s
   
zLoggingStatLogger._resetr8   c                 C   s
   | j jjS r   )r   rS   enable_mfu_metricsr+   r    r    r"   r\      s   
z$LoggingStatLogger._enable_perf_statsr&   c                 C   sD   |  j |j 7  _ |  j|j7  _|  j|j7  _|  j|j7  _d S r   )r_   r`   ra   rb   num_preempted_reqs)r!   r&   r    r    r"   _track_iteration_stats   s   z(LoggingStatLogger._track_iteration_statstracked_statsrc   c                 C   s"   || j  }|dkrdS t|| S )NrG   )r^   float)r!   rg   rc   
delta_timer    r    r"   _get_throughput   s   
z!LoggingStatLogger._get_throughputc                 C   s   d | jS )NzEngine {:03d}: )formatr   r+   r    r    r"   
log_prefix   s   zLoggingStatLogger.log_prefixNr%   r'   r(   c                 C   s   |r|  | |dur[| j|j |jdur| j|j |jdur*| j|j |j }r5| j	| | j
durF|jdurF| j
|j | jsL|| _|j }r[|  r[| j| |re| j| dS dS )zLog Stats to standard output.N)rf   rL   observeprefix_cache_statsconnector_prefix_cache_statsrM   spec_decoding_statsrO   kv_connector_statsrQ   rR   cudagraph_statsr[   rK   
perf_statsr\   r]   rN   )r!   r%   r&   r'   r(   rq   rs   r    r    r"   r*      s,   





zLoggingStatLogger.recordc                 C   sV   t  }| | j|}| | j|}| | t||| j| jf | _	|| _|| _d S r   )
rI   rJ   rj   r_   r`   rH   anyrX   rY   rZ   )r!   rc   prompt_throughputgeneration_throughputr    r    r"   _update_stats   s   

zLoggingStatLogger._update_statsc                 C   r   r   r    r+   r    r    r"   aggregate_scheduler_stats      z+LoggingStatLogger.aggregate_scheduler_statsc                 C   sd  |    |   | jrtjntj}g d}| j| j| jj	| jj
g}| jdkr1|d || j |ddg || jjd | jjd g tjrU|d || j | jjsg|d || jjd  | jjsy|d	 || jjd  || jd
| g|R   | jj|d | jj|d | jd ur| jj|d |  r| jj|| jd d S d S )N)z$Avg prompt throughput: %.1f tokens/sz(Avg generation throughput: %.1f tokens/szRunning: %d reqszWaiting: %d reqsr   zPreemptions: %dzGPU KV cache usage: %.1f%%zPrefix cache hit rate: %.1f%%d   zCorrupted: %d reqsz&External prefix cache hit rate: %.1f%%zMM cache hit rate: %.1f%%z, )log_fn)r{   rl   )rw   rx   rZ   loggerdebuginforX   rY   rK   num_running_reqsnum_waiting_reqsrb   r>   extendkv_cache_usagerL   hit_rateenvsVLLM_COMPUTE_NANS_IN_LOGITSra   rM   emptyrN   rl   joinrO   r-   rQ   rR   r\   r]   )r!   r{   	log_partslog_argsr    r    r"   r-      sV   







zLoggingStatLogger.logc                 C   s(   | j jjrtd| j| j jj d S d S )NzSEngine %03d: vllm cache_config_info with initialization after num_gpu_blocks is: %d)r   cache_confignum_gpu_blocksr|   r}   r   r+   r    r    r"   r,     s   
z(LoggingStatLogger.log_engine_initializedr1   r2   )r3   r4   r5   r   r7   r#   rH   boolr\   r   rf   rh   rj   propertyrl   r   r   r*   rw   rx   r-   r,   r    r    r    r"   rF   ^   s.    	

#:rF   c                	   @   s   e Zd Zdedee fddZedd Zde	fdd	Z
	
	dded
B ded
B ded
B defddZdd Zdd Zdd Zd
S )AggregatedLoggingStatLoggerr   rC   c                 C   s2   || _ dd | j D | _tj| |dd d| _d S )Nc                 S   s   i | ]}|t  qS r    )r   .0idxr    r    r"   
<dictcomp>  s    z8AggregatedLoggingStatLogger.__init__.<locals>.<dictcomp>)r   T)rC   last_scheduler_stats_dictrF   r#   r[   rD   r    r    r"   r#     s   
z$AggregatedLoggingStatLogger.__init__c                 C   s   d t| jS )Nz{} Engines Aggregated: )rk   lenrC   r+   r    r    r"   rl   $  s   z&AggregatedLoggingStatLogger.log_prefixr8   c                 C   s   dS )NFr    r+   r    r    r"   r\   (  ry   z.AggregatedLoggingStatLogger._enable_perf_statsNr   r%   r&   r'   r(   c                 C   sH   || j vrtd| d S tj| ||||d |d ur"|| j|< d S d S NzUnexpected engine_idx: %dr'   r(   )rC   r|   warningrF   r*   r   r)   r    r    r"   r*   ,  s   
z"AggregatedLoggingStatLogger.recordc                 C   sh   t  | _| j D ]}| j j|j7  _| j j|j7  _| j j|j7  _q	| j jt| j  _d S r   )r   rK   r   valuesr   r   r   r   )r!   rK   r    r    r"   rx   @  s   z5AggregatedLoggingStatLogger.aggregate_scheduler_statsc                 C   s   t |  d S r   )rF   r-   r+   r    r    r"   r-   N  s   zAggregatedLoggingStatLogger.logc                 C   s,   | j jjrtdt| j| j jj d S d S )NzR%d Engines: vllm cache_config_info with initialization after num_gpu_blocks is: %d)r   r   r   r|   r~   r   rC   r+   r    r    r"   r,   Q  s   
z2AggregatedLoggingStatLogger.log_engine_initializedr2   )r3   r4   r5   r   rE   r7   r#   r   rl   r   r\   r   r   r   r*   rx   r-   r,   r    r    r    r"   r     s.    


r   c                	   @   sf   e Zd Zdedee deddfddZ		dd	edB d
e	dB de
dB defddZdd Zdd ZdS )PerEngineStatLoggerAdapterr   rC   per_engine_stat_logger_factoryr8   Nc                 C   s*   i | _ || _|D ]
}|||| j |< qd S r   )per_engine_stat_loggersrC   )r!   r   rC   r   r   r    r    r"   r#   \  s   z#PerEngineStatLoggerAdapter.__init__r   r%   r&   r'   r(   c                 C   s6   || j vrtd| d S | j | j||||d d S r   )r   r|   r   r*   r)   r    r    r"   r*   i  s   


z!PerEngineStatLoggerAdapter.recordc                 C      | j  D ]}|  qd S r   )r   r   r-   r!   per_engine_stat_loggerr    r    r"   r-   z     
zPerEngineStatLoggerAdapter.logc                 C   r   r   )r   r   r,   r   r    r    r"   r,   ~  r   z1PerEngineStatLoggerAdapter.log_engine_initializedr2   )r3   r4   r5   r   rE   r7   PerEngineStatLoggerFactoryr#   r   r   r   r*   r-   r,   r    r    r    r"   r   [  s.    

r   c                	   @   s   e Zd ZeZeZeZe	Z
eZ	ddedee dB fddZdedefdd	Z		
ddedB dedB dedB defddZddedefddZdd ZdS )PrometheusStatLoggerNr   rC   c           (         s  |d u rdg}|| _ t  || _|jj| _|jj| _ddg}|jj|jj	}fdd|D }| 
|j||| _| |||| _| jddd|d	}t||| _| jd
dd|d	}t||| _| jdd|dg ddi | _g d}|D ]fdd|D | j< qu|   | jddd|d	}	t|	|| _tjr| jdd|d}
t|
|| _| jdd|d}t||| _| jdd|d}t||| _| jdd|d}t||| _| jdd|d}t||| _| jdd |d}t||| _| jd!d"|d}t||| _| jd#d$|d}t||| _ | jd%d&|d}t||| _!| jd'd(|d}t||| _"i | _#| jd)d*|d+g d t$D ] fd,d|D | j#< qC| j%d-d&t&||d.}t||| _'| j%d/d(t&||d.}t||| _(| j%d0d1g d2|d.}t||| _)| j%d3d4t&||d.}t||| _*| j%d5d6g d7|d.}t||| _+| j%d8d9t&||d.}t||| _,| j%d:d;g d<|d.}t||| _-| j%d=d>g d?|d.}t||| _.| j%d@dAg d?|d.}t||| _/g dB}| j%dCdD||d.}t||| _0| j%dEdF||d.}t||| _1| j%dGdH||d.} t| || _2| j%dIdJ||d.}!t|!|| _3| j%dKdL||d.}"t|"|| _4| j%dMdNt&||d.}#t|#|| _5| jrg dO}$| j%dPdQ|$|d.}%t|%|| _6| j%dRdS|$|d.}&t|&|| _7| j%dTdU|$|d.}'t|'|| _8n	i | _6i | _7i | _8d | _9|j:d urt;| j dVkrt<=dW dX| _>dY| _?dZ| _@|j:jA| _B| jd[d\d]| j>| j?| j@gd	| _9d S d S )^Nr   
model_nameenginec                    s   i | ]	}| t |gqS r    )strr   )r   r    r"   r     s    z1PrometheusStatLogger.__init__.<locals>.<dictcomp>zvllm:num_requests_runningz.Number of requests in model execution batches.
mostrecentr@   documentationmultiprocess_mode
labelnameszvllm:num_requests_waitingz+Number of requests waiting to be processed.zvllm:engine_sleep_statezEngine sleep state; awake = 0 means engine is sleeping; awake = 1 means engine is awake; weights_offloaded = 1 means sleep level 1; discard_all = 1 means sleep level 2.sleep_state)r@   r   r   r   )awakeweights_offloadeddiscard_allc                    s   i | ]}| j |d qS ))r   r   r   )labelsr   )gauge_engine_sleep_stater   sr    r"   r     s    zvllm:kv_cache_usage_percz*KV-cache usage. 1 means 100 percent usage.zvllm:corrupted_requestszMCorrupted requests, in terms of total number of requests with NaNs in logits.)r@   r   r   zvllm:prefix_cache_queriesz;Prefix cache queries, in terms of number of queried tokens.zvllm:prefix_cache_hitsz7Prefix cache hits, in terms of number of cached tokens.z"vllm:external_prefix_cache_querieszsExternal prefix cache queries from KV connector cross-instance cache sharing, in terms of number of queried tokens.zvllm:external_prefix_cache_hitszoExternal prefix cache hits from KV connector cross-instance cache sharing, in terms of number of cached tokens.zvllm:mm_cache_queriesz?Multi-modal cache queries, in terms of number of queried items.zvllm:mm_cache_hitsz;Multi-modal cache hits, in terms of number of cached items.zvllm:num_preemptionsz0Cumulative number of preemption from the engine.zvllm:prompt_tokensz#Number of prefill tokens processed.zvllm:generation_tokensz&Number of generation tokens processed.zvllm:request_successz)Count of successfully processed requests.finished_reasonc              	      s$   i | ]}|  t|tqS r    r   r   r   )counter_request_success_baser   reasonr    r"   r   a  s    zvllm:request_prompt_tokens)r@   r   bucketsr   zvllm:request_generation_tokenszvllm:iteration_tokens_totalz.Histogram of number of tokens per engine_step.)             @         i   i   i   i   i    i @  z&vllm:request_max_num_generation_tokensz;Histogram of maximum number of requested generation tokens.zvllm:request_params_nz%Histogram of the n request parameter.)r         
      zvllm:request_params_max_tokensz.Histogram of the max_tokens request parameter.z vllm:time_to_first_token_secondsz,Histogram of time to first token in seconds.)MbP?{Gzt?{Gz?{Gz?g{Gz?gQ?g{Gz?皙?g      ?      ?      ?      ?      @      @      @      $@      4@      D@      T@g      d@g      @g      @z vllm:inter_token_latency_secondsz,Histogram of inter-token latency in seconds.)r   g?皙?g333333?r   g333333?皙?333333?g?r   r   r   r   r   r   r   r   r   r   z*vllm:request_time_per_output_token_secondsz7Histogram of time_per_output_token_seconds per request.)r   r   g?r   g      ?g       @r   r   r   g      .@r   g      >@r   g      I@g      N@g      ^@g      n@g      ~@g      @g      @g      @z vllm:e2e_request_latency_secondsz,Histogram of e2e request latency in seconds.zvllm:request_queue_time_secondsz5Histogram of time spent in WAITING phase for request.z#vllm:request_inference_time_secondsz5Histogram of time spent in RUNNING phase for request.z!vllm:request_prefill_time_secondsz5Histogram of time spent in PREFILL phase for request.z vllm:request_decode_time_secondsz4Histogram of time spent in DECODE phase for request.z'vllm:request_prefill_kv_computed_tokenszMHistogram of new KV tokens computed during prefill (excluding cached tokens).)r   gMb`?r   r   r   r   r   r   r   r   r   r   r   r      <   x   i,  iX  i  i  zvllm:kv_block_lifetime_secondsz|Histogram of KV cache block lifetime from allocation to eviction. Sampled metrics (controlled by --kv-cache-metrics-sample).z'vllm:kv_block_idle_before_evict_secondszqHistogram of idle time before KV cache block eviction. Sampled metrics (controlled by --kv-cache-metrics-sample).zvllm:kv_block_reuse_gap_secondszHistogram of time gaps between consecutive KV cache block accesses. Only the most recent accesses are recorded (ring buffer). Sampled metrics (controlled by --kv-cache-metrics-sample).r   zfvllm:lora_requests_info prometheus metrics may be incorrect/misleading with data parallel deployments.max_lorawaiting_lora_adaptersrunning_lora_adapterszvllm:lora_requests_infozRunning stats on lora requests.sum)CrC   r   r   rS   show_hidden_metricskv_cache_metricskv_cache_metrics_enabledmodel_configserved_model_namemax_model_len_spec_decoding_clsspeculative_configspec_decoding_prom_kv_connector_clskv_connector_prom
_gauge_clsmake_per_enginegauge_scheduler_runninggauge_scheduler_waitingr   r0   gauge_kv_cache_usager   r   _counter_clscounter_corrupted_requestscounter_prefix_cache_queriescounter_prefix_cache_hits&counter_connector_prefix_cache_queries#counter_connector_prefix_cache_hitscounter_mm_cache_queriescounter_mm_cache_hitscounter_num_preempted_reqscounter_prompt_tokenscounter_generation_tokenscounter_request_successr   _histogram_clsbuild_1_2_5_buckets#histogram_num_prompt_tokens_request'histogram_num_generation_tokens_requesthistogram_iteration_tokens+histogram_max_num_generation_tokens_requesthistogram_n_requesthistogram_max_tokens_requesthistogram_time_to_first_tokenhistogram_inter_token_latency'histogram_request_time_per_output_tokenhistogram_e2e_time_requesthistogram_queue_time_request histogram_inference_time_requesthistogram_prefill_time_requesthistogram_decode_time_request%histogram_prefill_kv_computed_requesthistogram_kv_block_lifetime$histogram_kv_block_idle_before_evicthistogram_kv_block_reuse_gapgauge_lora_infolora_configr   r|   r   labelname_max_loralabelname_waiting_lora_adapterslabelname_running_lora_adapters	max_lorasr   )(r!   r   rC   r   r   per_engine_labelvaluesr   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r  request_latency_bucketsr  r  r  r  r  r  kv_cache_residency_bucketsr  r	  r
  r    )r   r   r   r   r   r"   r#     s  


			
zPrometheusStatLogger.__init__r;   
config_objc                 C   s   |  }d|d< d\}}|dkrd}d}|d usJ d| | j||d| d	}| jD ]}|  }t||d< |jdi |d
 q-d S )N r   NNr   zvllm:cache_config_infoz(Information of the LLMEngine CacheConfigzUnknown metrics info type r   r   r   r    )metrics_infor   keysrC   r   r   set)r!   r;   r  r  r@   r   
info_gauger   r    r    r"   log_metrics_info  s$   
z%PrometheusStatLogger.log_metrics_infor   r%   r&   r'   r(   c                 C   s  |dur| j | |j | j| |j | j| |j | j| |j	j
 | j| |j	j |jdurL| j| |jj
 | j| |jj |jdurY| j|j| |jdurf| j|j| | jr|jr| j| }| j| }| j| }|jD ]}||j ||j |jD ]}	||	 qq~| jdurd|j  }
d|j!  }| j"|
| j#|| j$| j%i}| jj&di |'  |dur| j(| |j
 | j)| |j |du rdS t*j+r| j,| |j- | j.| |j/ | j0| |j1 | j2| |j3 | j4| |j1|j3  |j5D ]}| j6| | q|j7D ]}| j8| | q%|j9D ]}| j:| | q4|j;D ]}| j<| | qC|j=D ]s}| j>|j? |   | j@| |jA | jB| |jC | jD| |jE | jF| |jG | jH| |jI |j1tJ|jKd }| jL| | | jM| |j1 | jN| |j3 | jO| |jP |jQr| jR| |jQ qRdS )zLog to prometheus.N,r   r    )Sr   r  r   r   r   r   r   r   incrn   queriesr   hitsro   r   r   rp   r   rm   rq   r   r   kv_cache_eviction_eventsr  r	  r
  lifetime_secondsidle_secondsreuse_gaps_secondsr  r   r   r  r   r  r  r  r   r   set_to_current_timer   r   r   r   r   ra   r   re   r   r_   r   r`   r   max_num_generation_tokens_iterr   n_params_iterr   time_to_first_tokens_iterr   inter_token_latencies_iterr   finished_requestsr   finish_reasonr  e2e_latencyr  queued_timer  prefill_timer  inference_timer  decode_timemaxnum_cached_tokensr  r   r   r  mean_time_per_output_tokenmax_tokens_paramr   )r!   r%   r&   r'   r(   lifetime_hist	idle_hist
reuse_histeventgapr   r   lora_info_labelsmax_gen_tokensn_paramttftitlfinished_requestprefill_kv_computedr    r    r"   r*     s   




































zPrometheusStatLogger.recordsleepr/   c                 C   s~   d}d}d}|dkrd}|dkrd}n|dkrd}| j D ] }| jd | | | jd | | | jd | | qd S )Nr   r   r   r   r   r   )rC   r   r  )r!   r@  r/   r   r   r   r(   r    r    r"   r0   k  s    
z'PrometheusStatLogger.record_sleep_statec                 C   s   |  d| jj d S )Nr   )r  r   r   r+   r    r    r"   r,   ~  s   z+PrometheusStatLogger.log_engine_initializedr   r2   r   r   )r3   r4   r5   r   r   r   r   r   r   r   r   r   r   r   rE   r7   r#   r   r
   r  r   r   r   r*   r0   r,   r    r    r    r"   r     s>    

    ?
 r   
PromMetricmetricengine_idxsr   c                    s    fdd|D S )Nc                    s   i | ]}|  t|qS r    r   r   rC  r   r    r"   r     s    z#make_per_engine.<locals>.<dictcomp>r    )rC  rD  r   r    rE  r"   r     s   r   mantissa_lst	max_valuec                 C   sD   d}g }	 | D ]}|d|  }||kr| | q|  S |d7 }q)z
    Builds a list of buckets with increasing powers of 10 multiplied by
    mantissa values until the value exceeds the specified maximum.

    r   Tr   r   )r>   )rF  rG  exponentr   mvaluer    r    r"   build_buckets  s   rK  c                 C   s   t g d| S )zR
    Example:
    >>> build_1_2_5_buckets(100)
    [1, 2, 5, 10, 20, 50, 100]
    )r   r   r   )rK  )rG  r    r    r"   r     s   r   c                   @   s   e Zd ZdZ					ddedee dB dee dB d	ed
edefddZ			dde
dB dedB dedB dedB fddZddedefddZdd Zdd ZdS ) StatLoggerManagera  
    StatLoggerManager:
        Logging happens at the level of the EngineCore (per scheduler).
         * DP: >1 EngineCore per AsyncLLM - loggers for each EngineCore.
         * With Local Logger, just make N copies for N EngineCores.
         * With Prometheus, we need a single logger with N "labels"

        This class abstracts away this implementation detail from
        the AsyncLLM, allowing the AsyncLLM to just call .record()
        and .log() to a simple interface.
    NTFr   r   rD  custom_stat_loggersenable_default_loggersaggregate_engine_loggingclient_countc                 C   s   |r|ndg| _ g | _g }|d ur|| |r3ttjr3|dkr(td n|r,tnt	}|
| d}	|D ])}
t|
trRt|
trR|
|| j d}t|trQd}	nt|| j |
d}| j
| q7|	so| j
t|| j  d S d S )Nr   r   zfAsyncLLM created with api_server_count more than 1; disabling stats logging to avoid incomplete stats.F)r   rC   T)r   rC   r   )rC   stat_loggersr   r|   isEnabledForloggingINFOr   r   rF   r>   r:   r;   r<   r   r   r   )r!   r   rD  rM  rN  rO  rP  stat_logger_factoriesdefault_logger_factorycustom_prometheus_loggerstat_logger_factoryglobal_stat_loggerr    r    r"   r#     sL   	



zStatLoggerManager.__init__r%   r&   r'   r(   c                 C   s.   |d u rd}| j D ]}|j||||d q	d S )Nr   r   )rQ  r*   )r!   r%   r&   r'   r(   r|   r    r    r"   r*     s   
zStatLoggerManager.recordr   r@  r/   c                 C   s   | j D ]}||| qd S r   )rQ  r0   )r!   r@  r/   r|   r    r    r"   r0     s   
z$StatLoggerManager.record_sleep_statec                 C      | j D ]}|  qd S r   )rQ  r-   )r!   r|   r    r    r"   r-        

zStatLoggerManager.logc                 C   rZ  r   )rQ  r,   )r!   
agg_loggerr    r    r"   r,     r[  z(StatLoggerManager.log_engine_initialized)NNTFr   r  rA  )r3   r4   r5   r6   r   rE   r7   StatLoggerFactoryr   r#   r   r   r   r*   r0   r-   r,   r    r    r    r"   rL    sD    


7
rL  )ArS  rI   abcr   r   collections.abcr   typingr   prometheus_clientr   r   r   	vllm.envsr   vllm.compilation.cuda_graphr	   vllm.configr
   r   4vllm.distributed.kv_transfer.kv_connector.v1.metricsr   r   vllm.loggerr   vllm.pluginsr   r   vllm.v1.enginer   vllm.v1.metrics.perfr   vllm.v1.metrics.prometheusr   vllm.v1.metrics.statsr   r   r   r   vllm.v1.spec_decode.metricsr   r   r3   r|   r7   r   r;   AggregateStatLoggerFactoryr]  r   rE   rB   r   rF   r   r   r   rB  __annotations__objectdictr   rK  r   rL  r    r    r    r"   <module>   s`   
 :D(      

	