o
    پi                     @   s$  d dl mZmZ d dlmZmZmZ d dlmZ d dl	m
Z
 d dlmZmZ eG dd dZeG dd	 d	Z	
					
d!dede
dee deedf deedf dededee defddZ	
	
				d"dee dededee dee deedf deedf dededefdd Zd
S )#    )	dataclassfield)ListOptionalTuple)BenchmarkResult)NightlyBenchmarkRunner)DEFAULT_URL_FOR_TESTModelLaunchSettingsc                   @   s   e Zd ZU dZedd dZee ed< dZ	e
edf ed< d	Ze
edf ed
< dZee ed< dZeed< dZee ed< dS )PerformanceTestParamsz#Parameters for performance testing.c                   C   s   g dS )N         @    r   r   r   W/home/ubuntu/.local/lib/python3.10/site-packages/sglang/test/performance_test_runner.py<lambda>   s    zPerformanceTestParams.<lambda>)default_factorybatch_sizesi   .
input_lensi   output_lensNprofile_dirmmmudataset_namespec_accept_length_threshold)__name__
__module____qualname____doc__r   r   r   int__annotations__r   r   r   r   r   strr   r   floatr   r   r   r   r   	   s   
 r   c                   @   s   e Zd ZU dZeed< eed< ee ed< dZee	 ed< dZ
ee	 ed< dZee	 ed< dZee	 ed	< dZeee  ed
< dZee	 ed< dS )PerformanceTestResultzlResult of a performance test.

    Aggregates metrics across all batch sizes tested for a single model.
    modelpassederrorNlatencyinput_throughputoutput_throughputoverall_throughputbenchmark_resultsavg_spec_accept_length)r   r   r    r!   r$   r#   boolr   r*   r%   r+   r,   r-   r.   r   r   r/   r   r   r   r   r&      s   
 r&   Nr   r   Fr   r'   perf_runnerr   r   .r   is_vlmr   r   returnc                 C   s,  |d u rg d}t dd  t d| j  t d| j  t d|  t d|  t d|  |d ur?t d	|  t d d d
g}|rS|d|  z|j| j|||| j| jpad|| jd\}	}
}|
r|	r|j|	| jd t d| j  d }d}|d ur|d u rd}d}t d|  n"||k rd|dd| }d}t d|  nt d|dd|  t|	dd d}t	| j|||j
|j|j|j|	|d	W S d| j }t d|  t	| jd|dW S  ty } z d| j d t| }t d|  t	| jd|dW  Y d }~S d }~ww )!Nr   
<============================================================zRunning PERFORMANCE test for z  Variant: z  Batch sizes: z  Input lens: z  Output lens: z   Spec accept length threshold: z--trust-remote-codez--dataset-name= )
model_pathr   r   r   
other_argsvariantextra_bench_argsenv)r9   u#   ✓ Performance test succeeded for Tz>Spec accept length threshold set but no accept length reportedFu   ✗ zSpec accept length z.2fz < threshold u   ✓ Spec accept length z >= threshold c                 S   s   | j S )N)
batch_size)rr   r   r   r   o   s    z&run_performance_test.<locals>.<lambda>)key)	r'   r(   r)   r*   r+   r,   r-   r.   r/   zPerformance test failed for )r'   r(   r)   zPerformance test exception for : )printr7   r9   appendrun_benchmark_for_model
extra_argsr;   
add_reportmaxr&   r*   r+   r,   r-   	Exceptionr$   )r'   r1   r   r   r   r2   r   r   r:   resultssuccessr/   	error_msgr(   largest_batch_resulter   r   r   run_performance_test+   s   

rL   modelsr   	test_namebase_urlc	              
   C   sR  |pt }t|||d}	|	  g }
d}| D ]7}td td|j  td|j  td|j  td t||	|||||d}|
| |j	sLd	}q|	
  td
 td|  td |
D ].}|j	ridnd}|jrvd|jddnd}td|j d| |  |jrtd|j  qbtd
 td|rdnd  td ||
dS )a  Run performance tests for multiple models.

    Args:
        models: List of ModelLaunchSettings to test
        profile_dir: Directory for performance profiles
        test_name: Name for the test (used in reports)
        base_url: Server base URL (default: DEFAULT_URL_FOR_TEST)
        batch_sizes: Batch sizes for perf test
        input_lens: Input lengths
        output_lens: Output lengths
        is_vlm: Whether these are VLM models
        dataset_name: Dataset name for VLM benchmarks

    Returns:
        dict with results:
        {
            "all_passed": bool,
            "results": [PerformanceTestResult, ...]
        }
    )r   rN   rO   TzQ
================================================================================zPERFORMANCE TEST: z  TP Size: z  Extra Args: zP================================================================================)r'   r1   r   r   r   r2   r   Fz=
============================================================zPerformance Test Summary: r5   PASSFAILz
, output: z.1fz tok/sr6   z  r?   z    Error: z	OVERALL: z
ALL PASSEDzSOME FAILEDz=============================================================
)
all_passedrG   )r	   r   setup_profile_directoryr@   r7   tp_sizerC   rL   rA   r(   write_final_reportr,   r'   r)   )rM   r   rN   rO   r   r   r   r2   r   r1   all_resultsrR   r'   resultstatusthroughput_strr   r   r   run_performance_for_models   s`   

rZ   )Nr   r   Fr   N)NNr   r   Fr   )dataclassesr   r   typingr   r   r   sglang.test.nightly_bench_utilsr   sglang.test.nightly_utilsr   sglang.test.test_utilsr	   r
   r   r&   r"   r0   r$   r%   rL   dictrZ   r   r   r   r   <module>   sz    

	
g

	
