o
    i                     @   s   d dl Z d dlmZmZ d dlZd dlZd dlmZ d dl	m
Z
 e
eZeG dd dZG dd dZG d	d
 d
Zdejdeeee f fddZdS )    N)	dataclassfield)SpeculativeConfig)init_loggerc                   @   s|   e Zd ZU dZeed< dZeed< dZeed< dZeed< e	e
dZe
e ed< eded	d fd
dZdedefddZdS )SpecDecodingStatszPer-step iteration decoding stats from scheduler.

    Each scheduler step, statistics on spec decoding performance are
    aggregated across requests by the scheduler and returned to the
    frontend in EngineCoreOutputs->SchedulerStats.
    num_spec_tokensr   
num_draftsnum_draft_tokensnum_accepted_tokens)default_factorynum_accepted_tokens_per_posreturnc                 C   s   | |dg| dS )Nr   )r   r    )clsr   r   r   Q/home/ubuntu/vllm_env/lib/python3.10/site-packages/vllm/v1/spec_decode/metrics.pynew   s   zSpecDecodingStats.newc                 C   s\   |  j d7  _ |  j|7  _|  j|7  _|| jksJ t|D ]}| j|  d7  < q d S )N   )r   r	   r
   r   ranger   )selfr	   r
   ir   r   r   observe_draft&   s   zSpecDecodingStats.observe_draftN)__name__
__module____qualname____doc__int__annotations__r   r	   r
   r   listr   classmethodr   r   r   r   r   r   r      s   
 r   c                   @   s<   e Zd ZdZdd Zdd ZdefddZej	fd	d
Z
dS )SpecDecodingLoggingzAggregate and log spec decoding metrics.

    LoggingStatLogger aggregates per-iteration metrics over a set
    time interval using observe() and then logs them using log()
    before resetting to zero.
    c                 C   s   |    d S N)resetr   r   r   r   __init__7   s   zSpecDecodingLogging.__init__c                 C   s&   g | _ g | _g | _g | _t | _d S r    )r   r	   r
   accepted_tokens_per_pos_liststime	monotoniclast_log_timer"   r   r   r   r!   :   s
   zSpecDecodingLogging.resetspec_decoding_statsc                 C   s<   | j |j  | j|j | j|j | j|j d S r    )r   appendr	   r
   r$   r   )r   r(   r   r   r   observeA   s   zSpecDecodingLogging.observec              	   C   s   | j sd S t| j }t| j}t| j}d}d}t | j }|dkr.|| }|| }|dkr8|| d ntd}d||  }	t	| j
}
tj|
dd| }ddd |D }|d	|	|||||| |   d S )
Nr   d   nanr   )axisz, c                 s   s    | ]}|d V  qdS )z.3fNr   ).0pr   r   r   	<genexpr>b   s    z*SpecDecodingLogging.log.<locals>.<genexpr>zSpecDecoding metrics: Mean acceptance length: %.2f, Accepted throughput: %.2f tokens/s, Drafted throughput: %.2f tokens/s, Accepted: %d tokens, Drafted: %d tokens, Per-position acceptance rate: %s, Avg Draft acceptance rate: %.1f%%)r   npsumr	   r
   r%   r&   r'   floatarrayr$   joinr!   )r   log_fnr   r	   r
   draft_throughputaccepted_throughputelapsed_timedraft_acceptance_ratemean_acceptance_length
pos_matrixacceptance_rates	rates_strr   r   r   logI   s<   zSpecDecodingLogging.logN)r   r   r   r   r#   r!   r   r*   loggerinfor?   r   r   r   r   r   /   s    r   c                	   @   sT   e Zd ZdZejZdedB dee	 de
eee f fddZdd	ed
efddZdS )SpecDecodingProma  Record spec decoding metrics in Prometheus.

    The acceptance rate can be calculated using a PromQL query:

      rate(vllm:spec_decode_num_accepted_tokens_total[$interval]) /
      rate(vllm:spec_decode_num_draft_tokens_total[$interval])

    The mean acceptance length (conventionally including bonus tokens)
    can be calculated using:

      1 + (
      rate(vllm:spec_decode_num_accepted_tokens_total[$interval]) /
      rate(vllm:spec_decode_num_drafts[$interval]))

    A per-position acceptance rate vector can be computed using

      vllm:spec_decode_num_accepted_tokens_per_pos[$interval] /
      vllm:spec_decode_num_drafts[$interval]
    speculative_configN
labelnamesper_engine_labelvaluesc                    s   |d u| _ | j s
d S | jdd|d}t||| _| jdd|d}t||| _| jdd|d}t||| _|d us:J | j r@|jnd|d	g }| jd
d|d  fdd| D | _d S )Nzvllm:spec_decode_num_draftszNumber of spec decoding drafts.)namedocumentationrD   z!vllm:spec_decode_num_draft_tokenszNumber of draft tokens.z$vllm:spec_decode_num_accepted_tokenszNumber of accepted tokens.r   positionz,vllm:spec_decode_num_accepted_tokens_per_posz#Accepted tokens per draft position.c                    s*   i | ]\} | fd dt D qS )c                    s$   g | ]} j g t|R  qS r   )labelsstr)r.   pos)base_counterlvr   r   
<listcomp>   s   $ z8SpecDecodingProm.__init__.<locals>.<dictcomp>.<listcomp>)r   )r.   idxrL   r   )rM   r   
<dictcomp>   s    z-SpecDecodingProm.__init__.<locals>.<dictcomp>)	spec_decoding_enabled_counter_clsmake_per_enginecounter_spec_decode_num_drafts$counter_spec_decode_num_draft_tokens'counter_spec_decode_num_accepted_tokensnum_speculative_tokensitems/counter_spec_decode_num_accepted_tokens_per_pos)r   rC   rD   rE   counter_draftscounter_draft_tokenscounter_accepted_tokenspos_labelnamesr   rP   r   r#      sT   

zSpecDecodingProm.__init__r   r(   
engine_idxc                 C   sl   | j sd S | j| |j | j| |j | j| |j t| j	| D ]\}}||j
|  q'd S r    )rR   rU   incr   rV   r	   rW   r
   	enumeraterZ   r   )r   r(   r_   rK   counterr   r   r   r*      s    


zSpecDecodingProm.observe)r   )r   r   r   r   prometheus_clientCounterrS   r   r   rJ   dictr   objectr#   r   r*   r   r   r   r   rB   x   s    
8rB   rb   rE   c                    s    fdd|  D S )z&Create a counter for each label value.c                    s   i | ]
\}}| j | qS r   )rI   )r.   rO   labelvaluesrb   r   r   rQ      s    
z#make_per_engine.<locals>.<dictcomp>)rY   )rb   rE   r   rh   r   rT      s   
rT   )r%   dataclassesr   r   numpyr1   rc   vllm.configr   vllm.loggerr   r   r@   r   r   rB   rd   re   r   r   rf   rT   r   r   r   r   <module>   s    Ia