o
    پi%                     @   s~  d dl Z d dlZd dlZd dlZd dlmZ d dlmZmZmZm	Z	 de
de
de	e
e
f fddZd	e
d
e
de
de
fddZd	e
d
e
de
de
fddZ		d,d	e
de
d
e
de
def
ddZdeeeef  de	eee
f ee eeef f fddZdedeeef fddZdd Zdd Zdd Zd-d!ee d"efd#d$Zed%kre jd&d'Zejd(d)d*d+ e Zeej dS dS ).    N)datetime)AnyDictListTuplebasenewreturnc                 C   s0   ||  }| dkrd}||fS ||  d }||fS )zReturns (diff, diff_percent).r           d    )r   r   diffpercentr   r   a/home/ubuntu/.local/lib/python3.10/site-packages/sglang/multimodal_gen/benchmarks/compare_perf.pycalculate_diff	   s   r   baselinerel_tolmin_abs_tolc                 C   s   | d|  }| | }t ||S )z<Calculates the upper bound for performance regression check.   )max)r   r   r   	rel_limit	abs_limitr   r   r   calculate_upper_bound      
r   c                 C   s   | d|  }| | }t ||S )z=Calculates the lower bound for performance improvement check.r   )min)r   r   r   	rel_lower	abs_lowerr   r   r   calculate_lower_bound   r   r   皙?      ^@c                 C   s4   t | ||}t| ||}||krdS ||k rdS dS )z
    Determines the status emoji based on performance difference.

    Logic:
      Upper bound (Slower): max(baseline * (1 + rel_tol), baseline + min_abs_tol)
      Lower bound (Faster): min(baseline * (1 - rel_tol), baseline - min_abs_tol)
    u   🔴u   🟢   ⚪️)r   r   )r   r   r   r   upper_boundlower_boundr   r   r   get_perf_status_emoji!   s   r#   
steps_listc                 C   s  i }i }g }t  }td}d}| D ]l}|dd}|dd}	||}
|
rT|}||vrCd||< d||< ||vrC|| || ||  |	7  < ||  d7  < q||vrnd||< d||< ||vrn|| || ||  |	7  < ||  d7  < q|||fS )	a  
    Aggregates specific repeating steps (like denoising_step_*) into groups.
    Returns:
        - aggregated_durations: {name: duration_ms}
        - ordered_names: list of names in execution order
        - counts: {name: count_of_steps_aggregated}
    z^denoising_step_(\d+)$Denoising Loopnameunknownduration_msr
   r   r   )setrecompilegetmatchappendadd)r$   	durationscountsordered_names
seen_namesdenoise_patterndenoising_group_namestepr&   durr-   keyr   r   r   consolidate_steps9   s:   







r9   	file_pathc                 C   s<   t | ddd}t|W  d   S 1 sw   Y  dS )zLoads a benchmark JSON file.rzutf-8)encodingN)openjsonload)r:   fr   r   r   _load_benchmark_filej   s   $rA   c                 C   s   | dk rdS | dkrdS dS )Ng       u   ✅g       @u   ❌r    r   )diff_pctr   r   r   #_get_status_emoji_from_diff_percentp   s
   rC   c                 C   s  | d }| dd}t||\}}	t|	}
td td td td|dd|dd	|d
d|	dd|
 d td|rAd| nddd|rKd| nddd td td td td |d \}}}|D ]\}| |d}| |d}| |d}| |d}t||\}}d}|dkr||krd| dnd| d| d}t||}td| | d|dd|dd|d
d|dd | d qkd S )!Nr   total_duration_ms#### 1. High-level Summaryz+| Metric | Baseline | New | Diff | Status |z$| :--- | :--- | :--- | :--- | :--- || **E2E Latency** | .2fz ms | z ms | **z+.2f ms (+.1fz%)** |  |z| **Throughput** | i  z	 req/s | z req/s | - | - |
#### 2. Stage BreakdownzI| Stage Name | Baseline (ms) | New (ms) | Diff (ms) | Diff (%) | Status |z+| :--- | :--- | :--- | :--- | :--- | :--- |r
   r    r%    (z steps)z->|  | z% | )r,   r   rC   printr#   )others_database_e2ecombined_orderbase_durationsothers_processedbase_countsnew_datanew_e2ediff_msrB   statusnew_durations_
new_countsstageb_valn_valb_countn_counts_diffs_pct	count_strstatus_emojir   r   r   _print_single_comparison_reporty   sF   *,
4rh   c              
   C   s  t d dd| d }dddgt|  d }t | t | d| dd	}t|D ]%\}	}
|
d
d}t| |\}}t|}|d|dd|dd| d7 }q.t | t d t d dd| d }dddgt|  d }t | t | |D ]D}||d}d| d|dd}t|D ])\}	\}}}||d}t||\}}t||}|d|dd|dd| d7 }qt | qd S )NrE   z| Metric | Baseline | rP   rJ   z| :--- | :--- | z:---rF   rG   z ms |rD   r    rH   rI   z%) rK   rL   z| Stage Name | Baseline | r
   rO   rN   )rQ   joinlen	enumerater,   r   rC   r#   )rS   rR   other_labelsrT   rU   rV   headerseprow_e2eidvalrZ   rB   r[   r_   r`   row_strn_durationsr]   n_countsra   re   rg   r   r   r   _print_multi_comparison_report   s8   "
"
rw   markdown
file_pathsoutput_formatc              
   C   s  t | dk rtd dS z	dd | D }W n ty/ } ztd|  W Y d}~dS d}~ww |d }|dd }d	d | dd D }|d
d}t|dg \}}	}
g }|D ]}t|dg \}}}||||f qZg }t|D ]\}}}|D ]}||vr|| q}qv|	D ]}||vr|| q|dkrtd t |dkrt||||||
 n	t|||||| td td td td|dd d t	|D ]\}}t |dkrdn|| }td| d|dd d qtdt
    td dS dS )z|
    Compares benchmark JSON files and prints a report.
    First file is baseline, others will be compared against it.
       z(Error: Need at least 2 files to compare.Nc                 S   s   g | ]}t |qS r   )rA   ).0r@   r   r   r   
<listcomp>   s    z&compare_benchmarks.<locals>.<listcomp>zError loading benchmark files: r   r   c                 S   s   g | ]}t j|qS r   )ospathbasename)r|   pr   r   r   r}      s    rD   stepsrx   z"### Performance Comparison Report
rK   z	<details>z<summary>Metadata</summary>
z- Baseline Commit: `commit_hashzN/A`Newz- z
 Commit: `z- Timestamp: z
</details>)rk   rQ   	Exceptionr,   r9   r.   reversedrh   rw   rl   r   now	isoformat)ry   rz   	data_liste	base_datarR   rm   rS   rU   
base_orderrW   rV   rr   r7   orderr1   rT   r]   r&   rq   labelr   r   r   compare_benchmarks   sz   




		 r   __main__z0Compare sglang-diffusion performance JSON files.)descriptionfiles+zFList of JSON files. First is baseline, others are compared against it.)nargshelp)r   r   )rx   )argparser>   r~   r*   r   typingr   r   r   r   floatr   r   r   strr#   intr9   rA   rC   rh   rw   r   __name__ArgumentParserparseradd_argument
parse_argsargsr   r   r   r   r   <module>   sX    


 
1	/.L