o
    
۾i9                  
   @   s  d Z ddlZddlZddlZddlZddlmZ ddlmZ ddlZ	ddl
mZmZ ddlmZ ddlmZ ddlmZ dd	lmZ dd
lmZ zddlZW n ey[   edZY nw 	d"dededeeee f fddZdeeee f dee deeeeef f fddZ dd Z!dej"deeef fddZ#dej$ddfddZ%dej"ddfddZ&e'dkrej$d d!Z(e%e( e() Z*e&e* dS dS )#a?  Benchmark multimodal processor latency.

This benchmark measures the latency of the mm processor module
using multimodal prompts from datasets.
MM processor stats are automatically enabled.

Run:
    vllm bench mm-processor \
        --model <your_model> \
        --dataset-name random-mm \
        --num-prompts 10 \
    N)datetime)Any)MultiModalConversationDatasetVisionArenaDataset)get_requests
EngineArgs)#get_timing_stats_from_engine_client)freeze_gc_heap)PlaceholderModulepandas
llm_enginenum_warmup_reqsreturnc                 C   sd   t | }g d}dd |D }t| |d }|D ]}|D ]}||v r.|| ||  qq|S )z
    Collect multimodal processor timing stats.
    Returns a dictionary mapping stage names to lists of timing values (in seconds).
    )hf_processor_timehashing_timecache_lookup_timeprompt_update_timepreprocessor_total_timeencoder_forward_timenum_encoder_callsc                 S   s   i | ]}|g qS  r   ).0keyr   r   P/home/ubuntu/.local/lib/python3.10/site-packages/vllm/benchmarks/mm_processor.py
<dictcomp>>       z.collect_mm_processor_stats.<locals>.<dictcomp>N)r	   listvaluesappend)r   r   	all_stats	stat_keysstats_by_stage
stats_list
stats_dictr   r   r   r   collect_mm_processor_stats+   s   	r%   r"   selected_percentilesc                    s   i }|   D ]E\}}|sdddddd |D ||< q|dk}|r$|ndd |D  tt tt tt d fdd|D ||< q|S )	z:
    Calculate aggregate metrics from stats by stage.
            )meanmedianstdc                 S   s   i | ]}d | dqS )pr'   r   r   r+   r   r   r   r   Z   s    z2calculate_mm_processor_metrics.<locals>.<dictcomp>r   c                 S   s   g | ]}|d  qS )  r   )r   tr   r   r   
<listcomp>_       z2calculate_mm_processor_metrics.<locals>.<listcomp>c                    s$   i | ]}d | t t |qS r+   floatnp
percentiler,   r   r   r   r   e   s   $ )itemsr3   r4   r(   r)   r*   )r"   r&   metrics
stage_nametimesis_count_metricr   r6   r   calculate_mm_processor_metricsK   s&   
r<   c                 C   s   t | dds
| j| _t| dsd| _t| dsd| _t| ds"d| _| jdkr.| js.td| jdkrLt	j
 tj
B }| j|vrNt| j dt| dS dS )	zE
    Validate command-line arguments for mm_processor benchmark.
    	tokenizerNdataset_path	lora_path	max_lorashfz--dataset-path is required when using --dataset-name hf. For multimodal benchmarking, specify a dataset like 'lmarena-ai/VisionArena-Chat'.zK is not a supported multimodal dataset. Supported multimodal datasets are: )getattrmodelr=   hasattrr>   r?   r@   dataset_name
ValueErrorr   SUPPORTED_DATASET_PATHSkeysr   sorted)argssupported_mm_datasetsr   r   r   validate_argsk   s2   




rL   rJ   c           #   	      sX  ddl m}m  t|  | jdu rd| _t| }|d'i t|	 }t
| |}tfdd|D s;J ddd |D }d	d |D } fd
d|D }dd t| dddD }t  t| dd}	|	dkrtd|	 d tjd'i t| }
|	|
_|
 jd7  _t
|
|}dd |D }dd |D } fdd|D }j||t| dd d tdt| d t }j||t| dd d}t }|| }tj|	}t| std t||}tdd |D }t|| }g |D ]@}|jr	|jdu r
q|j}t|dddur=t|dddur=t|dddur=|j}t d|j!|j" }#|| d   qsU|dkrUtd! || }|d  g| rwt$t%&}t$t%'}t$t%(}fd"d|D }nd}d}d}d#d |D }i } d$|v r|d$ r|d$ }!t)t*|!t|!d%} |||||||| d&}"|"S )(z1
    Run the multimodal processor benchmark.
    r   )LLMSamplingParamsNc                 3   s&    | ]} j jj|j|j kV  qd S N)r   model_configmax_model_len
prompt_lenexpected_output_lenr   request)llmr   r   	<genexpr>   s    

z1benchmark_multimodal_processor.<locals>.<genexpr>zpPlease ensure that max_model_len is greater than the sum of prompt_len and expected_output_len for all requests.c                 S      g | ]}|j qS r   promptrT   r   r   r   r/      r   z2benchmark_multimodal_processor.<locals>.<listcomp>c                 S   rX   r   rS   rT   r   r   r   r/      r   c                    s   g | ]
} d d|ddqS )   r'   T)ntemperature
max_tokens
detokenizer   r   
output_lenrN   r   r   r/      s    c                 S      g | ]}t |qS r   r3   r,   r   r   r   r/          metric_percentiles99,num_warmupszProcessing z warmup requests...r\   c                 S   rX   r   rY   r   reqr   r   r   r/      r   c                 S   rX   r   r[   rk   r   r   r   r/      r   c                    s   g | ]} |d qS ))r_   r   ra   rc   r   r   r/      s    
disable_tqdmF)use_tqdmz requests...u   
⚠️  Warning: No MM processor stats found in registry.
   This may indicate that:
   - No multimodal requests were processed
   - Stats were already retrieved (registry is cleared after retrieval)
c                 S   s   g | ]}|j r|qS r   )finished)r   or   r   r   r/      s    first_token_latencylast_token_tsfirst_token_tsr'   r-   u   
⚠️  Warning: Detailed end-to-end latency metrics not available.
   Falling back to average request latency (total_time / num_completed_requests).
c                    s    g | ]}|t t |fqS r   r2   r,   )
e2el_timesr   r   r/     s    c                 S   s   g | ]}|d fqS )r'   r   r,   r   r   r   r/     r0   r   )total_encoder_callsnum_requests_with_encoder_calls)	completedfailedmean_e2el_msmedian_e2el_msstd_e2el_mspercentiles_e2el_msmm_processor_statsencoder_summaryr   )+vllmrM   rN   rL   seedr   from_cli_argsdataclassesasdictget_tokenizerr   allrB   splitr
   printargparse	Namespacevarsnum_promptschatlentimeperf_counterr%   r   anyr   r<   ro   r8   rq   maxrr   rs   r   r3   r4   r(   r)   r*   intsum)#rJ   rM   engine_argsr=   requestspromptsexpected_output_lenssampling_paramsr&   rj   warmup_argswarmup_requestswarmup_promptswarmup_output_lenswarmup_sampling_params
start_timeoutputsend_time
total_timemm_stats_by_stagemm_processor_metricsrw   rx   outputr8   ttftdecode_timeavg_time_per_requestry   rz   r{   r|   r~   encoder_callsbenchmark_resultr   )rN   rt   rV   r   benchmark_multimodal_processor   s   



	






r   parserc                 C   s   ddl m} ||  | jdd | jdtdddgdd	 | jd
tddd | jdtddd ddlm}m	} ||  ||  | jdtddd | jdtddd | jdtddd | jdtddd | jdtddd | jdtddd | jd d!d"d# dS )$z9Add CLI arguments for the multimodal processor benchmark.r   r   T)enable_mm_processor_statsz--dataset-namez	random-mmrA   z=Name of the dataset to benchmark on. Defaults to 'random-mm'.)typedefaultchoiceshelpz--num-prompts
   zNumber of prompts to process.)r   r   r   z--num-warmupsr\   z$Number of warmup prompts to process.)add_random_dataset_base_args"add_random_multimodal_dataset_argsz--dataset-pathNzlPath to the dataset file or HuggingFace dataset name (e.g., 'yale-nlp/MMVU', 'lmarena-ai/VisionArena-Chat').z--hf-subsetz-Subset of the HuggingFace dataset (optional).z
--hf-splitzGSplit of the HuggingFace dataset (e.g., 'train', 'test', 'validation').z--output-lenzVOutput length for each request. Overrides the default output lengths from the dataset.z--output-jsonz2Path to save the benchmark results in JSON format.z--metric-percentilesrh   zDComma-separated list of percentiles to calculate (e.g., '50,90,99').z--disable-tqdm
store_truezDisable tqdm progress bar.)actionr   )
vllm.engine.arg_utilsr   add_cli_argsset_defaultsadd_argumentstrr   vllm.benchmarks.datasetsr   r   )r   r   r   r   r   r   r   r   &  s   

r   c                    s  t d t| }t d t d t d d|v rt d dd t| d	d
dD }g }|d  D ]=\}}|dk}|r>dnd}|| |d d|d d|d dd}|D ] |d  dd|d  < qW|| q2t|}	t |	j	dd d|v r|d r|d d }
|d d }t d|
 d| d d|v rt d  d!d t| d	d
dD }d"|d dd#d$|d% dd#d&|d' dd#g}|D ] t
 fd(d)|d* D d}|d  |dd# qt|}t |j	dd | jrA| j| jt| d+d,t| d-d,d.|d/< t  |d0< t| jd1}tj||d2d3 W d,   n	1 s2w   Y  t d4| j  d,S d,S )5z8Main entry point for the multimodal processor benchmark.z*Starting multimodal processor benchmark...zQ
================================================================================z&Multimodal Processor Benchmark ResultszP================================================================================r}   z
MM Processor Metrics:c                 S   rd   r   re   r,   r   r   r   r/     rf   zmain.<locals>.<listcomp>rg   rh   ri   r    z (ms)r(   z.2fr)   r*   )StageMeanMedianStdr+   r'   PF)indexr~   ru   rv   z

Summary: z total encoder calls across z
 requests.ry   z
End-to-End Latency (ms):c                 S   rd   r   re   r,   r   r   r   r/     rf   r   )Metricz
Value (ms)r   rz   r   r{   c                 3   s     | ]\}}| kr|V  qd S rO   r   )r   pctvalr1   r   r   rW     s    zmain.<locals>.<genexpr>r|   random_input_lenNrandom_output_len)rC   r   	input_lenrb   config	timestampw   )indentz
Results saved to )r   r   rB   r   r7   getr   pd	DataFrame	to_stringnextoutput_jsonrC   r   r   now	isoformatopenjsondump)rJ   resultr&   mm_datastager8   is_countunitrowmm_dftotal_callsnum_requests	e2el_datapercentile_valuee2el_dffr   r1   r   mainy  s   


"




r   __main__zBenchmark mm processor latency)description)r   )+__doc__r   r   r   r   r   typingr   numpyr4   r   r   r   vllm.benchmarks.throughputr   r   r   "vllm.multimodal.processing.contextr	   vllm.utils.gc_utilsr
   vllm.utils.import_utilsr   r   r   ImportErrorr   dictr   r   r3   r%   r<   rL   r   r   ArgumentParserr   r   __name__r   
parse_argsrJ   r   r   r   r   <module>   sb   
 
 

 SR