o
    پi                     @   s   d Z ddlZddlmZ ddlmZmZ ddlmZ dedefdd	Z	e
d
krJe Zee ee e ZeeZeeZe	ee dS dS )af  
Benchmark the latency of running a single batch with a server.

This script launches a server and uses the HTTP interface.
It accepts server arguments (the same as launch_server.py) and benchmark arguments (e.g., batch size, input lengths).

Usage:
python3 -m sglang.bench_one_batch_server --model meta-llama/Meta-Llama-3.1-8B --batch-size 1 16 64 --input-len 1024 --output-len 8

python3 -m sglang.bench_one_batch_server --model None --base-url http://localhost:30000 --batch-size 16 --input-len 1024 --output-len 8
python3 -m sglang.bench_one_batch_server --model None --base-url http://localhost:30000 --batch-size 16 --input-len 1024 --output-len 8 --show-report --profile --profile-by-stage
python3 -m sglang.bench_one_batch_server --model None --base-url http://localhost:30000 --batch-size 16 --input-len 1024 --output-len 8 --output-path results.json --profile
    N)
ServerArgs)	BenchArgsrun_benchmark_internal)save_results_as_pydantic_modelsserver_args
bench_argsc                 C   s2   t | |\}}|jrt||j| j|jd ||fS )N)pydantic_result_filename
model_pathr   )r   r   r   r	   server_args_for_metrics)r   r   resultsserver_info r   Q/home/ubuntu/.local/lib/python3.10/site-packages/sglang/bench_one_batch_server.pyrun_benchmark   s   r   __main__)__doc__argparsesglang.srt.server_argsr   +sglang.test.bench_one_batch_server_internalr   r   sglang.test.nightly_bench_utilsr   r   __name__ArgumentParserparseradd_cli_args
parse_argsargsfrom_cli_argsr   r   r   r   r   r   <module>   s    



