o
    
۾i4                     @   s  d dl Z d dlZd dlZd dlmZmZ d dlmZ d dlmZ d dl	m
Z
mZmZ d dlmZ ddlmZmZ dd	lmZmZmZ dd
lmZ ddlmZmZ ddlmZ zd dlZW n eyi   edZY nw zd dl m!Z! W n ey   ed"d"dZ!Y nw dededefddZ#dedede$de%dB fddZ&dede%dB fdd Z'dede$fd!d"Z(ded#ed$ede$def
d%d&Z)d'edB d(e*e$ dededed)e%d*e+fd+d,Z,ed- Z-G d.d/ d/e.e%e/f Z0ded0e*e.e$e1f  fd1d2Z2dd3d4d'edB d(e*e$ dedededed)e%d*e+de-d5e%d6e%fd7d8Z3d'edB d(e*e$ dededede-ded)e%d*e+fd9d:Z4d;e*e$ d(e*e$ d<e*e$ d=e+d>ed?ed@ede-ded)e%d*e+fdAdBZ5eG dCdD dDeZ6dEe6fdFdGZ7dEe j8fdHdIZ9e:dJkre j;e6j<dKZ=e6>e= e9e=?  dS dS )L    N)asdict	dataclass)datetime)Path)ClassVarLiteralget_args)PlaceholderModule   )ParameterSweepParameterSweepItem)SweepServeArgsrun_benchmark
run_server)ServerProcess)SLASweepSLASweepItem)sanitize_filenamepandas)PchipInterpolatorscipyinterpolater   
output_dir
serve_comb
bench_combc                 C   sP   t t  }|r|d|jddf |r|d|jddf | td| S )NzSERVE--sepzBENCH-)liststrextendas_textr   join)r   r   r   parts r$   S/home/ubuntu/.local/lib/python3.10/site-packages/vllm/benchmarks/sweep/serve_sla.py_get_sla_base_path"   s   
r&   	base_pathsla_combsla_variable	sla_valuec                 C   s6   |d u r|j dd}| d| d S | | d|  S )Nr   r   zSLA--.json=)r!   )r'   r(   r)   r*   prefixr$   r$   r%   _get_sla_iter_path0   s   r.   	iter_path
run_numberc                 C   s    |d u r| d S | d| d S )Nsummary.jsonzrun=r+   r$   )r/   r0   r$   r$   r%   _get_sla_run_path=   s   r2   c                 c   sL    |  | dD ]}t|j| d}|d }| r#||fV  q	d S )Nz=*r,   r1   )globintnameremoveprefixexists)r'   r)   r/   r*   summary_pathr$   r$   r%   _iter_sla_val_pathsD   s   
r9   bench_combs	sla_combsc                 C   s@   |D ]}t || |}|D ]}t|||d d s  dS qqdS )Nr*   TF)r&   r.   r7   )r   r:   r;   r)   r   r   r'   r(   r$   r$   r%   _sla_needs_serverL   s   	r=   server	bench_cmdnum_runsdry_runc             
   C   s   t tttf   }t|D ]}t| ||||t|||d}	|	d ur&||	 q|r+d S t|d dd}
t	j
||
dd W d    |S 1 sHw   Y  |S )N)serve_overridesbench_overridesr0   output_pathrA   )r0   w   indent)r   dictr   objectranger   r2   appendopenjsondump)r>   r?   r   r   r/   r@   rA   	iter_datar0   run_datafr$   r$   r%   run_slaa   s,   



rS   )request_ratemax_concurrencyc                       sd   e Zd Zdededdf fddZdeee ee f fddZdefd	d
Z	defddZ
  ZS )
SLAHistory	min_value	max_valuereturnNc                    s   t    || _|| _d S N)super__init__rW   rX   )selfrW   rX   	__class__r$   r%   r\      s   

zSLAHistory.__init__c                 C   sF   t t  }t t  }t|  D ]\}}|| || q||fS rZ   )r   r4   floatsorteditemsrL   )r]   xsysxyr$   r$   r%   get_xy   s   


zSLAHistory.get_xyc                 C      t dd |  D | jdS )Nc                 s   s     | ]\}}|d kr|V  qdS r   Nr$   .0valmarginr$   r$   r%   	<genexpr>       z-SLAHistory.get_max_passing.<locals>.<genexpr>default)maxrb   rW   r]   r$   r$   r%   get_max_passing      zSLAHistory.get_max_passingc                 C   rh   )Nc                 s   s     | ]\}}|d kr|V  qdS ri   r$   rj   r$   r$   r%   rn      ro   z-SLAHistory.get_min_failing.<locals>.<genexpr>rp   )minrb   rX   rs   r$   r$   r%   get_min_failing   ru   zSLAHistory.get_min_failing)__name__
__module____qualname__r4   r\   tupler   r`   rg   rt   rw   __classcell__r$   r$   r^   r%   rV      s
    	rV   rP   c                    s<    sJ d fdd| D fdd|   D }t|S )NzSummary should not be emptyc                    s,   i | ]  t  fd dD t qS )c                 3   s    | ]	}t |  V  qd S rZ   )r`   )rk   rQ   kr$   r%   rn      s    z-_compute_margin.<locals>.<dictcomp>.<genexpr>)sumlen)rk   )rP   r}   r%   
<dictcomp>   s    z#_compute_margin.<locals>.<dictcomp>c                    s   g | ]
\}}|  |qS r$   )print_and_compute_margin)rk   r~   	criterion)iter_data_meanr$   r%   
<listcomp>   s    
z#_compute_margin.<locals>.<listcomp>)rb   rr   )r(   rP   sla_marginsr$   )rP   r   r%   _compute_margin   s   

r   i    )sla_min_valuesla_max_valuer   r   c       	      
   C   s  t tttf   }t|	|
d}t||D ]%\}}|d}t|}W d    n1 s-w   Y  t	||||< q|
 d | k rt||	d|
k rO|
}n9t||
d|	krZ|	}n.t| ddi}| }t|dkrzt|
 |  d }nt|d }||v r|d7 }t|	t||
}td	| d
| d t| |||||iB t||||||d}|d u rd S t	||}|dkrtd|dd n	td|dd || |||< |
 d | k sD||fS )N)rW   rX   rbr
   rp   extrapolateFr      zTesting z: z req/s)r   r   r/   r@   rA   zSLA criteria are met. (margin=z.2f)z"SLA criteria are not met. (margin=)r   rI   r   rJ   rV   r9   rM   rN   loadr   rt   rw   rr   rv   r   rg   solver   r4   printrS   r.   r    )r>   r?   r   r   r(   r'   r@   rA   r)   r   r   sla_datahistorypast_sla_valuepathrR   past_iter_datarl   spl	spl_rootsrP   rm   r$   r$   r%   	solve_sla   sP   
	

+r   c                C   s   t d t d|   t| ||||||||d	}	|	d u r,|s"J t d t d d S |	\}
}| }t d| d| d t|||d d	d
}tj|
|dd W d    n1 s]w   Y  t d |
S )Nz[SLA START]zSLA criteria: )r   r   r(   r'   r@   rA   r)   zOmitting SLA search.z	[SLA END]zMaximum z
 for SLA: z req/s.r<   rE   rF   rG   )r   r!   r   rt   r.   rM   rN   rO   )r>   r?   r   r   r(   r)   r'   r@   rA   resultr   sla_historyr*   rR   r$   r$   r%   
search_sla   sD   r   	serve_cmdafter_bench_cmdshow_stdoutserve_paramsbench_params
sla_paramsc                   s   t  fdd|D rtd  dttttf   }|D ]M}t||| |r1t| ||||
dnt	 .}|D ]#}|D ]}t
|||}t||||| ||	|
d	}|d urZ|| q<q8W d    n1 sfw   Y  q|
rpd S tj|}||d  |S )Nc                 3   s    | ]}|  V  qd S rZ   )	has_param)rk   r   r)   r$   r%   rn   =  s    zrun_slas.<locals>.<genexpr>zYou should not override `zU` in `bench_params` in SLA mode, since it is supposed to be determined automatically.)r   rB   rA   )r   r   r(   r)   r'   r@   rA   zsummary.csv)any
ValueErrorr   rI   r   rJ   r=   r   
contextlibnullcontextr&   r   r    pd	DataFramefrom_recordsto_csv)r   r?   r   r   r   r   r   r)   r   r@   rA   all_datar   r>   r   r(   r'   	comb_datacombined_dfr$   r   r%   run_slas/  sb   
	
$r   c                       st   e Zd ZU eed< eed< dZee ed< dZ	ee ed< e
dejfdd	Ze
d
ejdejf fddZ  ZS )SweepServeSLAArgsr   r)   	serve_slaparser_namez5Tune a variable to meet SLAs under multiple settings.parser_helpargsc                 C   sF   t |}|jrt|j}ntg }| di t|||jdS )N)r   r)   r$   )r   from_cli_argsr   r   	read_jsonr   r   r)   )clsr   	base_argsr   r$   r$   r%   r   z  s   


zSweepServeSLAArgs.from_cli_argsparserrY   c                    sD   t  |}|d}|jdtddd |jdtttddd	 |S )
Nzsla optionsz--sla-paramsTa  Path to JSON file containing a list of SLA constraints to satisfy. Each constraint is expressed in `{"<KEY>": "<OP><VALUE>"}` format, e.g.: `{"p99_e2el_ms": "<=500"}` means that the E2E latency should be less than 500ms 99%% of the time. Setting this option runs this script in SLA mode, which searches for the maximum `sla_variable` that satisfies the constraints for each combination of `serve_params`, `bench_params`, and `sla_params`.)typerequiredhelpz--sla-variablerT   zSWhether to tune request rate or maximum concurrency to satisfy the SLA constraints.)r   choicesrq   r   )r[   add_cli_argsadd_argument_groupadd_argumentr   r   SLAVariable)r   r   	sla_groupr^   r$   r%   r     s    
	zSweepServeSLAArgs.add_cli_args)rx   ry   rz   r   __annotations__r   r   r   r   r   classmethodargparse	Namespacer   ArgumentParserr   r|   r$   r$   r^   r%   r   r  s   
 $r   r   c                 C   s   | j p	t d}| j| }| j r| std| dzt| j| j	| j
| j| j| j| j| j|| j| jdW S  tyM } z	td| d|d }~ww )Nz%Y%m%d_%H%M%Sz+Cannot resume from non-existent directory (r   )r   r?   r   r   r   r   r   r)   r   r@   rA   z/The script was terminated early. Use `--resume z2` to continue the script from its last checkpoint.)resumer   nowstrftimer   r7   r   r   r   r?   r   r   r   r   r   r)   r@   rA   BaseExceptionRuntimeError)r   	timestampr   excr$   r$   r%   run_main  s4   

r   c                 C   s   t t|  d S rZ   )r   r   r   )r   r$   r$   r%   main  s   r   __main__)description)@r   r   rN   dataclassesr   r   r   pathlibr   typingr   r   r   vllm.utils.import_utilsr	   param_sweepr   r   server   r   r   r>   r   	sla_sweepr   r   utilsr   r   r   ImportErrorscipy.interpolater   placeholder_attrr&   r   r4   r.   r2   r9   r=   r   boolrS   r   rI   r`   rV   rJ   r   r   r   r   r   r   r   r   rx   r   r   r   r   
parse_argsr$   r$   r$   r%   <module>   s2  



#
	

G	

1	

C4

