o
    ٷi;                     @   s   d dl Z d dlZd dlZd dlZd dlZd dlZd dlZd dlmZ d dl	m
Z
 eeZdd Zdd Zdd	 Zd
d Zdd ZedkrJe  dS dS )    N)setup_logger)BenchmarkRecordc                  C   s  t  } | jddtdd | jddtdd | jdd	td
d | jddtdd | jddddd | jddddd | jdtddd | jdtddd | jdtddd | jdtddd | jd td!d"d# | jd$td!g d%d&d' | jd(td!g d)d*d' | jd+td,d-d | jd.ddd/d | jd0td1d2d | jd3td d4d |  }t|d5|jd6d7 	d8d9 d:|j
 d;|j }|js||_tj|jd!d< | jd=9  _|S )>Nz-b--batch-sizesz1 2)typedefaultz-s--sequence-lengthsz8 16 32 64 128 256 512z-w--warmup-runs   z-n
--num-runs  z--hf-pt-eagerF
store_truez,Benchmark in PyTorch without `torch.compile`)r   actionhelpz--hf-pt-compilez)Benchmark in PyTorch with `torch.compile`--hf-ort-dir-path zDPath to folder containing ONNX models for Optimum + ORT benchmarking)r   r   r   z--ort-msft-model-pathzAPath to ONNX model from https://github.com/microsoft/Llama-2-Onnxz --ort-convert-to-onnx-model-pathz'Path to ONNX model from convert_to_onnx--cache-dirz./model_cachez-Cache dir where Hugging Face files are stored--model-nameTzModel name in Hugging Face)r   requiredr   --precision)int4int8fp16fp32zPrecision to run model)r   r   choicesr   --device)cpucudarocmzDevice to benchmark modelsz--device-idr   zGPU device IDz	--verbosezPrint detailed logsz	--timeout
   z8Number of mins to attempt the benchmark before moving on--log-folderz'Path to folder to save logs and results
model_size/.-z./_)exist_ok<   )argparseArgumentParseradd_argumentstrint
parse_argssetattr
model_namesplitreplacer    	precision
log_folderosmakedirstimeout)parserargslog_folder_name r:   g/home/ubuntu/.local/lib/python3.10/site-packages/onnxruntime/transformers/models/llama/benchmark_all.pyget_args   s    r<   c              
   C   s  g }d\}}}d\}}}	}
d}d}d}d}d}d}d	}t |}|D ]}|d
d}||v r9t|t|d  }q"||v rHt|t|d  }q"||v rOd}q"||v rVd}q"||v rlt|t||d }|d }q"||v r~t|t||d }	q"||v rd|v rt||dd |d d }
n |||t| d  dd}t||  d }t|d }
|||||||	|
g }|	| q"W d    |S 1 sw   Y  |S )N)NNN)NNNNzBatch Size: zSequence Length: zto get past_key_valueszwith past_key_valuesz	Latency: zThroughput: zpeak=
r   promptz	per-token r   CPU=   z MB'"max_used_MB)
openr1   r,   lenfloatrfindfindjsonloadsappend)	device_idlog_filebase_resultsentries
batch_sizesequence_lengthstep	latency_s
latency_ms
throughputmemorybatch_patternsequence_patternprompt_step_patternper_token_step_patternlatency_patternthroughput_patternmemory_patternf
input_linelinepeakusageentryr:   r:   r;   process_log_file   s`   


&"
	
))rf   c                 C   sV  dd l }|j| g dd}|d d|d< |d d|d< |d d|d< |d d|d< |d	 d
|d	< |d d
|d< |d d
|d< |d d
|d< dd l}|j}tdd |D }d}d}|r}|d dd }|d dd }g }	| D ]\}
}|d dv rt|d |d d|d ||}n)|d dv rt|d |d d|d t	j
t	j}nt|d |d |d |d dd}|d |j_|d |j_|d |j_|d |j_|d |jjd< |d |jjd< |d	 |jjd< |d |j_|d |jjd< |d |j_|	| qt||	 t|d d!|	 td"| d# d S )$Nr   )Warmup RunsMeasured Runs
Model NameEngine	PrecisionDevice
Batch SizeSequence LengthStepLatency (s)Latency (ms)Throughput (tps)Memory (GB))columnsrg   r,   rh   rm   rn   rp   rH   rq   rr   rs   c                 S   s(   g | ]}|j d v r|j  d|j qS ))onnxruntimezonnxruntime-gpu==)keyversion).0ir:   r:   r;   
<listcomp>   s   ( z save_results.<locals>.<listcomp>r   rv   rB   rj   )optimum-ortru   ri   rk   ru   rl   )pytorch-eagerpytorch-compilepytorchro   measure_stepenginelatency_s_meanthroughput_tps.csvz.jsonzResults saved in !)pandas	DataFrameastypepkg_resourcesworking_setsortedr0   iterrowsr   torch__name____version__configwarmup_runsmeasured_runsrR   
seq_length
customizedmetricslatency_ms_meanmax_memory_usage_GBrM   save_as_csvsave_as_jsonr1   loggerinfo)resultsfilenamepddfr   installed_packagesinstalled_packages_listort_pkg_nameort_pkg_versionrecordsr%   rowrecordr:   r:   r;   save_results   s`   "r   c           	   	   C   s   | dt j  dd}tj| j|}t|d'}tj|||d}z|	| j
 W n tjy8   |  Y nw W d    n1 sCw   Y  td | j| j| j|| j| jg}t| j||}|S )Nr%   %Y-%m-%d_%H:%M:%Sz.logw)stdoutstderrz Gathering data from log files...)datetimenowr4   pathjoinr3   rF   
subprocessPopenwaitr6   TimeoutExpiredkillr   r   r   num_runsr/   r2   devicerf   rN   )	r8   benchmark_cmdr   log_filenamelog_pathrO   processrP   r   r:   r:   r;   	benchmark$  s   
r   c                  C   s  t  } t| j t| j dtjj_	g }t
| jtjd< | jrWdddddd| jd	| jd
| jd| jd| jdt
| jdt
| jd| jd| jdg}td t	| |d}|| | jrdddddd| jd	| jd
| jd| jd| jdt
| jdt
| jd| jd| jdg}td t	| |d}|| | jrdddddd| jd| jd	| jd
| jd| jd| jdt
| jdt
| jd| jd| jdg}td t	| |d}|| | jrdddddd| jd| jd	| jd
| jd| jd| jdt
| jdt
| jd| jd| jg}td t	| |d}|| | jrHdddddd| jd| jd	| jd
| jd| jd| jdt
| jdt
| jd| jd| jg}td t	| |d }|| | j d!| j d!tj d"d#}t |tj!"| j| d S )$NTCUDA_VISIBLE_DEVICESpythonz-mzmodels.llama.benchmarkz--benchmark-typezhf-pt-eagerr   r   r   r   r   r   r
   r   r   z--authz'Benchmark PyTorch without torch.compiler}   zhf-pt-compilez$Benchmark PyTorch with torch.compiler~   zhf-ortr   z Benchmark Optimum + ONNX Runtimer|   zort-msftz--ort-model-pathz)Benchmark Microsoft model in ONNX Runtimezort-convert-to-onnxz/Benchmark convert_to_onnx model in ONNX Runtimeru   r%   r   r   )#r<   r   verboser   r   __dict__r   backendscudnnr   r+   rN   r4   environhf_pt_eagerr/   r2   batch_sizessequence_lengthsr   r   r   r3   	cache_dirextendhf_pt_compilehf_ort_dir_pathort_msft_model_pathort_convert_to_onnx_model_pathr    r   r   r   r   r   )r8   all_resultsr   r   csv_filer:   r:   r;   main6  s:  











"r   __main__)r(   r   rK   loggingr4   r   r   benchmark_helperr   r   r   	getLoggerr   r   r<   rf   r   r   r   r:   r:   r:   r;   <module>   s(   
 9M 2
