o
    ۷i<                     @   s   d dl Z d dlmZ d dlZd dlmZ d dlmZ d dl	m
Z
 d dlmZmZmZmZ eG dd deZd	efd
dZd	efddZd	efddZded	efddZdee dee
 dededee deeef deeee f fddZdS )    N)	dataclass)PreTrainedTokenizerBase)SampleRequest)RequestFuncOutput)"MILLISECONDS_TO_SECONDS_CONVERSIONTERM_PLOTLIB_AVAILABLEBenchmarkMetricsTaskTypec                   @   s   e Zd ZU dZeed< dZeed< dZeed< dZe	e
eef  ed< dZeed< dZeed	< dZeed
< dZeed< dZeed< dZeed< dZe	e
eef  ed< dZeed< dZeed< dZeed< dZe	e
eef  ed< dS )MultiModalsBenchmarkMetrics        mean_audio_ttfp_msmedian_audio_ttfp_msstd_audio_ttfp_msNpercentiles_audio_ttfp_mstotal_audio_duration_sr   total_audio_framesaudio_throughputmean_audio_rtfmedian_audio_rtfstd_audio_rtfpercentiles_audio_rtfmean_audio_duration_smedian_audio_duration_sstd_audio_duration_spercentiles_audio_duration_s)__name__
__module____qualname__r   float__annotations__r   r   r   listtupler   r   intr   r   r   r   r   r   r   r   r    r#   r#   Z/home/ubuntu/vllm_env/lib/python3.10/site-packages/vllm_omni/benchmarks/metrics/metrics.pyr
      s    
 r
   metricsc                 C   s  t djdddd t dd|j t dd|j |d ur(t dd	| |td
kr6t dd| t dd| t dd|j |rRt dd|j t|tr`t dd|j	 | t
jksid|v rntd| t| || | t
jkr~t|| t d d S )N{s:{c}^{n}}z Serving Benchmark Result 2   =snc{:<40} {:<10}zSuccessful requests:zFailed requests:zMaximum request concurrency:inf{:<40} {:<10.2f}zRequest rate configured (RPS):zBenchmark duration (s):zRequest throughput (req/s):zRequest goodput (req/s):Peak concurrent requests:e2elz2==================================================)printformat	completedfailedr   request_throughputrequest_goodput
isinstancer
   max_concurrent_requestsr	   
GENERATIONprocess_one_metricprint_text_metricsprint_audio_metrics)	task_typeselected_percentile_metricsmax_concurrencyrequest_ratebenchmark_durationgoodput_config_dictr%   r#   r#   r$   print_metrics   s&   	



rD   c                 C   s   t djdddd t dd|j t|tr<t dd|j t d	d
|j t d	d|j t d	d|j t d	d|j	 | t
jkr^|D ]}|dkrSqL|ds]t|| qLd S d S )Nr&   z Text Result r'   r(   r)   r-   zTotal input tokens:zTotal generated tokens:r/   z Output token throughput (tok/s):z%Peak output token throughput (tok/s):r0   zTotal Token throughput (tok/s):r1   audio)r2   r3   total_inputr8   r
   total_outputoutput_throughputmax_output_tokens_per_sr9   total_token_throughputr	   r:   
startswithr;   )r>   r?   r%   metricr#   r#   r$   r<   <   s"   



r<   c                 C   sl   t djdddd t dd|j t dd	|j t dd
|j | D ]}|dr3t|| q'd S )Nr&   z Audio Result r'   r(   r)   r/   z"Total audio duration generated(s):r-   zTotal audio frames generated:z#Audio throughput(audio duration/s):rE   )r2   r3   r   r   r   rK   r;   )r?   r%   rL   r#   r#   r$   r=   N   s   

r=   metric_attribute_namec                 C   s^  dddddddd}| | | }td	j|d
dd | dk}| dk}d}d}|r-d}d}n|r3d}d}d|  | }t||d}	td|   | ddd|	d d|  | }
t||
d}td|   | ddd|d d|  | }t||g }|D ]*\}}| rtt|nt|}d| d|   | d}t|dd|d qd S )NzTime to First Tokenz'Time per Output Token (excl. 1st token)zInter-token LatencyzEnd-to-end LatencyzTime to First PacketzReal Time FactorzAudio Duration)ttfttpotitlr1   
audio_ttfp	audio_rtfaudio_durationr&   r'   -r)   rR   rS   _msz (ms)_sz (s) mean_r   zMean :z<40 z<10.2fmedian_zMedian percentiles_P)getr2   r3   getattrupper
is_integerstrr"   )rM   r%   metric_header_mapheaderis_audio_rtfis_audio_durationsuffixunit_suffixmean_attr_name
mean_valuemedian_attr_namemedian_valuepercentiles_attr_namepercentiles
percentilevaluep_strlabelr#   r#   r$   r;   X   sB   
&&r;   input_requestsoutputsdur_s	tokenizerselected_percentilesrC   returnc           .         s"  g }d}d}d}g g g }g g g g g  g }t t|D ]}|| jr|| j}|s:t||| jddj}|| || | j7 }d}|dkra|| j|| j	 }||d  }| || || j
7 || j	 t|| dd t|| dd  t|| dd |t|| d	d || j |d7 }q |d q |rg }g }d
|v r| ||d
 t  d|v r| ||d t  d|v r|| ||d t  d|v r| ||d t  t| D ]}tdd t||D }|r|d7 }q|dkr1dPddt_tjddd d}d}dd |D }dd |D }|rtdd |D }tdd |D }tt|| d }t|} t|}!t|D ]g\}}"|"j|"j	 g}#|#d }$|"j
D ]}%|$|%7 }$|#|$ q|#D ] }&t|&| }'d|'  kr|k rn q| |'  d7  < qt|"j| }(t|"j|"j | })t |(|)d D ]}*|!|*  d7  < qːqqt| dkrtt| }tt|!}trddl}+|+ },|,jt t| | dd |,jt t|!|!dd |,!  nt"d t#dQi d|d t|d!|d"t$|d#|| d$|| d%t$|| d&|t$| | d't%pPdd( d)t&p[dd( d*t'pfdd( d+fd,d|D d-t%p{dd( d.t&pdd( d/t'pdd( d0fd1d|D d2t% pdd3t& pdd4t' pdd5 fd6d|D d7t$ d8t$|d9t$ | d:t%pdd;t&pdd<t'pdd=fd>d|D d?t%pdd( d@t&pdd( dAt'pdd( dBfdCd|D dDt%p,dd( dEt&p7dd( dFt'pBdd( dGfdHd|D dIt%pWdd( dJt&pbdd( dKt'pmdd( dLfdMd|D dN|dO|}-t(||||	|
||- |-|fS )Ra  Calculate the metrics for the benchmark.

    Args:
        input_requests: The input requests.
        outputs: The outputs of the requests.
        dur_s: The duration of the benchmark.
        tokenizer: The tokenizer to use.
        selected_percentiles: The percentiles to select.
        goodput_config_dict: The goodput configuration.

    Returns:
        A tuple of the benchmark metrics and the actual output lengths.
    r   F)add_special_tokens   rQ   r   rR   rS   audio_framesrN   
audio_ttftrO   r1   c                 S   s   g | ]\}}||kqS r#   r#   ).0r*   rr#   r#   r$   
<listcomp>   s    z%calculate_metrics.<locals>.<listcomp>Nc                 S   s   | d| d|j  d|  dS )NrY   z: 
)r   )msgcategoryfilenamelinenoliner#   r#   r$   <lambda>   s    z#calculate_metrics.<locals>.<lambda>zYAll requests failed. This is likely due to a misconfiguration on the benchmark arguments.   )
stacklevelc                 S   s   g | ]}|j r|qS r#   successr}   outputr#   r#   r$   r          c                 S   s   g | ]}|j s|qS r#   r   r   r#   r#   r$   r      r   c                 s   s    | ]}|j V  qd S N)
start_timer   r#   r#   r$   	<genexpr>   s    z$calculate_metrics.<locals>.<genexpr>c                 s   s    | ]	}|j |j V  qd S r   )r   latencyr   r#   r#   r$   r      s    zOutput tokens per second)titlezConcurrent requests per secondz8tip: install termplotlib and gnuplot to plot the metricsr4   r5   rF   rG   r6   r7   rH   rJ   mean_ttft_ms  std_ttft_msmedian_ttft_mspercentiles_ttft_msc                    $   g | ]}|t  p
d |d fqS r   r   npro   r}   p)ttftsr#   r$   r   4     $ r   r   r   r   c                    r   r   r   r   )audio_ttfpsr#   r$   r   8  r   r   r   r   r   c                        g | ]}|t  p
d |fqS r   r   r   )rS   r#   r$   r   <       r   r   r   r   r   r   r   c                    r   r   r   r   )
audio_rtfsr#   r$   r   C  r   mean_tpot_msstd_tpot_msmedian_tpot_mspercentiles_tpot_msc                    r   r   r   r   )tpotsr#   r$   r   G  r   mean_itl_ms
std_itl_msmedian_itl_mspercentiles_itl_msc                    r   r   r   r   )itlsr#   r$   r   K  r   mean_e2el_msstd_e2el_msmedian_e2el_mspercentiles_e2el_msc                    r   r   r   r   )e2elsr#   r$   r   O  r   rI   r9   r   r#   ))rangelenr   output_tokensgenerated_text	input_idsappend
prompt_lentext_latencyrN   rP   r_   r   r   zipallwarningsformatwarningwarnminmaxr"   r   ceilzeros	enumerater   r   r   termplotlibfigureplotarangeshowr2   r
   summeanstdmedianrD   ).rs   rt   ru   rv   rw   rC   r>   r?   r@   rA   rB   actual_output_lensrF   r4   good_completed	all_tpotsr{   i
output_lenrO   latency_minus_ttftvalid_metrics
slo_values
req_metricis_good_reqrI   r9   successful_outputsfailed_outputsmin_start_timemax_end_timeduration_secondstokens_per_secondconcurrent_requests_per_secondr   token_timescurrent_time	itl_value
token_timesecond_bucketrequest_start_secondrequest_end_secondsecondtplfigr%   r#   )rS   r   r   r   r   r   r   r$   calculate_metrics   s  















	
 !"#$%&'()+	r   )r   dataclassesr   numpyr   transformersr   vllm.benchmarks.datasetsr   )vllm.benchmarks.lib.endpoint_request_funcr   vllm.benchmarks.server   r   r   r	   r
   rD   r<   r=   rb   r;   r    r   dictr!   r"   r   r#   r#   r#   r$   <module>   sD    


-
