o
    پiH3                     @   s   d Z ddlZddlZddlZddlZddlmZmZmZ ddl	Z	ddl
mZ ddlmZmZ ddlmZmZmZmZ G dd dZdS )	zDUtilities for running nightly performance benchmarks with profiling.    N)ListOptionalTuple)kill_process_tree)BenchmarkResultgenerate_markdown_report)!DEFAULT_TIMEOUT_FOR_SERVER_LAUNCHis_in_cipopen_launch_serverwrite_github_step_summaryc                   @   s  e Zd ZdZ	d2dedededefddZd3d
dZ	d4deded	eeef fddZ			d5dede	e
 dee
df dee
df dededee	e  dee	e  ded	e	e fddZ	d4de	e ded	eejef fdd Z	d4deded	ee	e ef fd!d"Z						d6dede	e
 dee
df dee
df d#ee	e  ded$ee	e  ded%ee
 d&ee d	ee	e eee f fd'd(Zd	ee fd)d*Z	d2d+e	e dee d	dfd,d-Zd3d.d/Zd	efd0d1ZdS )7NightlyBenchmarkRunnera  Helper class for running nightly performance benchmarks with profiling.

    This class encapsulates common patterns used across nightly performance tests,
    including profile directory management, benchmark command construction,
    result parsing, and report generation.
    Nprofile_dir	test_namebase_url
gpu_configc                 C   sb   || _ || _|| _|ptjdd| _d| }| jr$|d| j d7 }|d7 }|t  | _	dS )aA  Initialize the benchmark runner.

        Args:
            profile_dir: Directory to store performance profiles
            test_name: Name of the test (used for reporting)
            base_url: Base URL for the server
            gpu_config: Optional GPU configuration string (e.g., "2-gpu-h100", "8-gpu-b200")
        
GPU_CONFIG z##  ()
N)
r   r   r   osenvirongetr   r   help_strfull_report)selfr   r   r   r   header r   M/home/ubuntu/.local/lib/python3.10/site-packages/sglang/test/nightly_utils.py__init__   s   
zNightlyBenchmarkRunner.__init__returnc                 C   s   t j| jdd dS )z1Create the profile directory if it doesn't exist.T)exist_okN)r   makedirsr   r   r   r   r   setup_profile_directory8   s   z.NightlyBenchmarkRunner.setup_profile_directoryr   
model_pathvariantc                 C   s   t t }|dd}|r%| d| d| }d| d| d| d}n| d| }d| d| d}tj| j|}||fS )a8  Generate unique profile filename and path for the model.

        Args:
            model_path: Path to the model (e.g., "deepseek-ai/DeepSeek-V3.1")
            variant: Optional variant suffix (e.g., "basic", "mtp", "nsa")

        Returns:
            Tuple of (profile_path_prefix, json_output_file)
        /_results_z.json)inttimereplacer   pathjoinr   )r   r%   r&   	timestampmodel_safe_nameprofile_filenamejson_filenameprofile_path_prefixr   r   r   generate_profile_filename<   s   z0NightlyBenchmarkRunner.generate_profile_filenameTbatch_sizes
input_lens.output_lensr3   json_output_file
extra_argsserver_argsenable_profilec
                 C   s   dddd|d| j dgdd |D d	d
d |D ddd |D dd| dd}
|	r<|r<|
ddd|g |rC|
| |rO|
d |
| |
S )a  Build the benchmark command with all required arguments.

        Args:
            model_path: Path to the model
            batch_sizes: List of batch sizes to test
            input_lens: Tuple of input lengths to test
            output_lens: Tuple of output lengths to test
            profile_path_prefix: Prefix for profile output files
            json_output_file: Path to JSON output file
            extra_args: Optional extra arguments to append to command
            server_args: Optional server launch arguments to record in metrics
            enable_profile: Whether to enable profiling (default True for NVIDIA)

        Returns:
            List of command arguments ready for subprocess.run()
        python3z-mzsglang.bench_one_batch_serverz--modelz
--base-urlz--batch-sizec                 S      g | ]}t |qS r   str.0xr   r   r   
<listcomp>|       zBNightlyBenchmarkRunner.build_benchmark_command.<locals>.<listcomp>z--input-lenc                 S   r=   r   r>   r@   r   r   r   rC   ~   rD   z--output-lenc                 S   r=   r   r>   r@   r   r   r   rC      rD   z--show-reportz--pydantic-result-filename=z--no-append-to-github-summaryz--trust-remote-codez	--profilez--profile-by-stagez--profile-output-dirz--server-args-for-metrics)r   extendappend)r   r%   r5   r6   r7   r3   r8   r9   r:   r;   commandr   r   r   build_benchmark_commandW   sR   	
	


z.NightlyBenchmarkRunner.build_benchmark_commandrG   model_descriptionc                 C   s`   t dd|  tj|ddd}|jdkr,|pd}t d| d t |j |d	fS |dfS )
a  Execute the benchmark command and return the result.

        Args:
            command: Command to execute
            model_description: Description for logging (e.g., "model_name (variant)")

        Returns:
            Tuple of (CompletedProcess, success_bool)
        zRunning command:  T)capture_outputtextr   	benchmarkzError running benchmark for :F)printr.   
subprocessrun
returncodestderr)r   rG   rI   resultdescr   r   r   run_benchmark_command   s   

z,NightlyBenchmarkRunner.run_benchmark_commandc           
   
   C   s   g }t j|s|pd}td| d|  |dfS z<t|d}t|}W d   n1 s0w   Y  |D ]}tdi |}|| q7tdt	| d|  |d	fW S  t
yz }	 z|pad}td
| d|	  |dfW  Y d}	~	S d}	~	ww )a  Load and parse benchmark results from JSON file.

        Args:
            json_output_file: Path to JSON output file
            model_description: Description for logging

        Returns:
            Tuple of (list of BenchmarkResult objects, success_bool)
        modelzWarning: JSON output file z not found for FrNzLoaded z benchmark results from Tz$Error loading benchmark results for z: r   )r   r-   existsrO   openjsonloadr   rF   len	Exception)
r   r8   rI   benchmark_resultsrU   f	json_datadatabenchmark_resulter   r   r   load_benchmark_results   s,   
z-NightlyBenchmarkRunner.load_benchmark_results
other_argsextra_bench_argstimeoutenvc                 C   s  g }d}| |rd| dnd }d}zot || j|pg |	dur!|	nt|
d}| ||\}}|r4t|ng }|r?|d|g | j|||||||||d	}| ||\}}|sg|ddfW |durft|j	 S S | 
||\}}|  }|||fW |durt|j	 S S |durt|j	 w w )	aM  Run a complete benchmark for a single model with server management.

        This method handles:
        - Server launch and cleanup
        - Profile filename generation
        - Benchmark command construction and execution
        - Result loading and parsing
        - Fetching speculative decoding accept length (for MTP/EAGLE)

        Args:
            model_path: Path to the model
            batch_sizes: List of batch sizes to test
            input_lens: Tuple of input lengths
            output_lens: Tuple of output lengths
            other_args: Arguments to pass to server launch
            variant: Optional variant suffix (e.g., "basic", "mtp")
            extra_bench_args: Extra arguments for the benchmark command
            enable_profile: Whether to enable profiling (default True for NVIDIA)
            timeout: Optional timeout for server launch (defaults to DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH)
            env: Environment dict for subprocess

        Returns:
            Tuple of (list of BenchmarkResult objects, success_bool, avg_spec_accept_length or None)
        Nr   r   r   )rW   r   rf   rh   ri   z
--run-name)r9   r:   r;   F)r
   r   r   r4   listrE   rH   rV   r   pidre   _get_spec_accept_length)r   r%   r5   r6   r7   rf   r&   rg   r;   rh   ri   r_   avg_spec_accept_lengthrI   processr3   r8   
bench_argsrG   rT   cmd_successload_successr   r   r   run_benchmark_for_model   s^   %


z.NightlyBenchmarkRunner.run_benchmark_for_modelc              
   C   s   zDt j| j ddd}|jdkr9| }|dg }|r<t|dkr?|d d}|durBtd	|d
 |W S W dS W dS W dS W dS  ty^ } ztd|  W Y d}~dS d}~ww )zQuery the server for avg_spec_accept_length metric.

        Returns:
            The average speculative decoding accept length, or None if not available.
        z/get_server_info
   )rh      internal_statesr   rm   Nz  avg_spec_accept_length=z.2fz/  Warning: Could not fetch spec accept length: )requestsr   r   status_coder[   r]   rO   r^   )r   responseserver_inforu   accept_lengthrd   r   r   r   rl   @  s.   

z.NightlyBenchmarkRunner._get_spec_accept_lengthresultsc                 C   s,   |rt | j||}|  j|d 7  _dS dS )zAdd benchmark results to the full report.

        Args:
            results: List of BenchmarkResult objects to add to report
        r   N)r   r   r   )r   r{   r&   report_partr   r   r   
add_reportT  s   z!NightlyBenchmarkRunner.add_reportc                 C   s   t  rt| j t| j dS )z2Write the final report to GitHub summary if in CI.N)r	   r   r   rO   r#   r   r   r   write_final_report`  s   
z)NightlyBenchmarkRunner.write_final_reportc                 C   s   | j S )zlGet the accumulated full report.

        Returns:
            The full markdown report as a string
        )r   r#   r   r   r   get_full_reportf  s   z&NightlyBenchmarkRunner.get_full_report)N)r    N)r   )NNT)Nr   NTNN)__name__
__module____qualname____doc__r?   r   r$   r   r4   r   r*   r   boolrH   rP   CompletedProcessrV   r   re   dictfloatrr   rl   r}   r~   r   r   r   r   r   r      s    



#



	

F

0



	

c

r   )r   r[   r   rP   r+   typingr   r   r   rv   sglang.srt.utilsr   sglang.test.nightly_bench_utilsr   r   sglang.test.test_utilsr   r	   r
   r   r   r   r   r   r   <module>   s    