o
    )wiaO                     @   s:  d dl Z d dlZd dlZd dlZd dlZd dlZd dlZd dlZd dlm	Z	 d dl
m
Z
 d dlmZ d dlZd dlZd dlZd dlmZmZ e	G dd dZe	G dd	 d	Z		
	d&ddZdd Zdd Zdd Zdd Zdd Zdd Zdd Zdd Zdd Zdd  Z d!d" Z!d#d$ Z"e#d%krdZ$e"  dS dS )'    N)	dataclass)datetime)Path)generate_test_dataget_bert_inputsc                   @   sv   e Zd ZU eed< eed< eed< eed< eed< eed< eed< eed< eed	< eed
< eed< eed< eed< dS )TestSetting
batch_sizesequence_length
test_cases
test_timesuse_gpuuse_io_bindingproviderintra_op_num_threadsseedverboselog_severityaverage_sequence_lengthrandom_sequence_lengthN)__name__
__module____qualname__int__annotations__boolstr r   r   d/home/ubuntu/sommelier/.venv/lib/python3.10/site-packages/onnxruntime/transformers/bert_perf_test.pyr   !   s   
 r   c                   @   sV   e Zd ZU eed< eed< eed< eed< eed< edB ed< edB ed< eed	< dS )
ModelSetting
model_pathinput_ids_namesegment_ids_nameinput_mask_name	opt_levelNinput_tuning_resultsoutput_tuning_results	mask_type)r   r   r   r   r   r   r   r   r   r   r   2   s   
 r      c                 C   sL  dd l }|| |rd| vrtd |rI|dkr ddg}n,|dkr)ddg}n#|d	kr2g d
}n|dkr;ddg}n|dkrDg d}nddg}ndg}| }	||	_|jj|	_|d u rb|j	j
|	_n+|dkrl|j	j|	_n!|dkrv|j	j|	_n|dkr|j	j|	_n|dkr|j	j
|	_n||	_|d ur||	_|j| |	|d}
|r|dkrd|
 v sJ nV|dkrd|
 v sJ nI|d	krd|
 v sJ d|
 v sJ n4|dkrd|
 v sJ n'|dkrd|
 v sJ d|
 v sJ nd|
 v sJ n	d|
 v sJ |d ur$t|}|
t| W d    |
S 1 sw   Y  |
S )Nr   CUDAExecutionProviderzWarning: Please install onnxruntime-gpu package instead of onnxruntime, and use a machine with GPU for testing gpu performance.dmlDmlExecutionProviderCPUExecutionProviderrocmROCMExecutionProvidermigraphx)MIGraphXExecutionProviderr-   r+   cudatensorrt)TensorrtExecutionProviderr(   r+      r'   c   )	providersr/   r2   )onnxruntimeset_default_logger_severityget_available_providersprintSessionOptionslog_severity_levelExecutionModeORT_SEQUENTIALexecution_modeGraphOptimizationLevelORT_ENABLE_ALLgraph_optimization_levelORT_DISABLE_ALLORT_ENABLE_BASICORT_ENABLE_EXTENDEDr   InferenceSessionget_providersopenset_tuning_resultsjsonload)r   r   r   r   rA   r   tuning_results_pathr6   execution_providerssess_optionssessionfr   r   r   create_session>   st   	










rP   c                 C   s,   t jtjt jtjt jtjt jtji}||  S )N)torchfloat32npfloat16int32int64longlong)
torch_typetype_mapr   r   r   
numpy_type   s   rZ   c                    s4    fdd|   D } fdd|  D }||fS )Nc                    "   i | ]\}}|t | qS r   rQ   
from_numpyto.0namearraydevicer   r   
<dictcomp>      " z/create_input_output_tensors.<locals>.<dictcomp>c                    r[   r   r\   r_   rc   r   r   re      rf   )items)inputsoutputsrd   input_tensorsoutput_tensorsr   rc   r   create_input_output_tensors   s   rl   c              
   C   sx   |   }| D ]\}}|||jjdt|j|j|  q| D ]\}}|	||jjdt|j|j|  q#|S Nr   )

io_bindingrg   
bind_inputrd   typerZ   dtypeshapedata_ptrbind_output)sessrj   rk   rn   ra   tensorr   r   r   create_io_binding   s(   rw   c                 C   s   g }g }|j r	dnd}t|D ]I\}}| ||}	||	 i }
tt|D ]
}|	| |
|| < q&t||
|\}}t| ||}| | t	
 }| | t	
 | }|| q||fS )Nr0   cpu)r   	enumeraterunappendrangelenrl   rw   run_with_iobindingtimeitdefault_timer)rN   
all_inputsoutput_namestest_settingresultslatency_listrd   _test_case_idrh   resultri   irj   rk   rn   
start_timelatencyr   r   r   %onnxruntime_inference_with_io_binding   s"   


r   c           
      C   st   t |dkr| |t| g }g }t|D ]\}}t }| ||}t | }	|| ||	 q||fS rm   )r}   rz   randomchoicery   r   r   r{   )
rN   r   r   r   r   r   rh   r   r   r   r   r   r   onnxruntime_inference   s   
r   c                 C   s   |  }dtj|  d}|d|j d|j ddd7 }|d|j d|j d7 }|d	|j	 d
|j
 d7 }|d|j d|j d7 }|d|j d7 }|d|j 7 }|S )Nzmodel=,zgraph_optimization_level=z,intra_op_num_threads=zGraphOptimizationLevel.ORT_ zbatch_size=z,sequence_length=ztest_cases=z,test_times=zuse_gpu=z,use_io_binding=zaverage_sequence_length=zrandom_sequence_length=)get_session_optionsospathbasenamerA   r   replacer   r	   r
   r   r   r   r   r   )r   rN   r   rM   optionr   r   r   	to_string   s   r   c              	   C   s   t | j|j|j|| j|j| jd}dd | D }t| j||}||v r,t	d| d S t	d| g }|j
rMt|jD ]}	t||||\}
}|| q;nt|jD ]}	t|||\}
}|| qRt|d }t|}t|d}t|d}t|d	}t|d
}t|d}|jd|  }|||||||f||< t	dt|dt|d | jrtj| j}tj|r|}|ddd  dt   d}t	d|d|d |  }t!|d}t"#|| W d    n1 sw   Y  t	d| d S d S )N)r   rK   c                 S   s   g | ]}|j qS r   )ra   )r`   outputr   r   r   
<listcomp>   s    z run_one_test.<locals>.<listcomp>zskip duplicated test:zRunning test:  2   K   Z   _   r4   g     @@z,Average latency = {} ms, Throughput = {} QPS.2fz.jsonr3   r   .zWARNING:zexists, will write tozinstead.wzTuning results is saved to)$rP   r   r   r   r#   r   r$   get_outputsr   r9   r   r|   r   r   extendr   rS   rb   
statisticsmean
percentiler   formatr%   r   r   abspathexistsrsplitr   now	timestampget_tuning_resultsrG   rI   dump)model_settingr   perf_resultsr   r   rN   r   keyall_latency_list_ir   r   
latency_msaverage_latency
latency_50
latency_75
latency_90
latency_95
latency_99
throughputoutput_pathold_output_pathtrsrO   r   r   r   run_one_test   sr   	



$r   c                 C   s,   t jt| ||||fd}|  |  d S )N)targetargs)multiprocessingProcessr   startjoin)r   r   r   r   r   processr   r   r   launch_test;  s   
r   c           	      C   s   |j d urt| ||||j  d S tjdd}tjdd}t||h}tdtd|D ]}||vr5|| q*|jdd |D ]
}t| |||| q>d S )NF)logicalTr3      )reverse)	r   r   psutil	cpu_countlistr|   minr{   sort)	r   r   r   r   r   logical_corescandidate_threadsr   r   r   r   r   run_perf_testsJ  s(   

r   c                 C   s|   t | j| j| j| j\}}}td|j d|j d|j  t	|j|j|j|j
|j||||j|j| jd}t| ||| d S )NzGenerating z samples for batch_size=z sequence_length=)r&   )r   r   r    r!   r"   r9   r
   r   r	   r   r   r   r   r   r&   r   )r   r   r   	input_idssegment_ids
input_maskr   r   r   r   run_performanceb  s.   
r   c                  C   s  t  } | jddtdd | jdddtddd	 | jd
ddtdd | jddtddd | jdddtddd | jddtg dddd | jddtddd | jdddd d! | jdd" | jd#dtd$g d%d&d' | jd(ddd)d! | jdd* | jd+ddd,d! | jdd- | jd.dtd d/d | jd0d1dtd d2d | jd3dtd d4d | jd5dtd d6d | jd7dtd d8d | jd9d td:d; | jd<d td=d; | jd>d?d@tdAd; | jdBdCdddDd! | jddE | jdFdtd$dGd |  }|S )HNz--modelTzbert onnx model path)requiredrp   helpz-bz--batch_size+zKbatch size of input. Allow one or multiple values in the range of [1, 128].)r   rp   nargsr   z-sz--sequence_lengthz maximum sequence length of inputz	--samplesF
   z!number of samples to be generated)r   rp   defaultr   z-tz--test_timesr   zJnumber of times to run per sample. By default, the value is 1000 / samplesz--opt_level)r   r3   r'   r4   r4   zZonnxruntime optimization level: 0 - disable all, 1 - basic, 2 - extended, 99 - enable all.)r   rp   choicesr   r   z--seed   zPrandom seed. Use the same seed to make sure test data is same in multiple tests.z	--verbose
store_truezprint verbose information)r   actionr   )r   z--log_severityr'   )r   r3   r'   r      z.0:Verbose, 1:Info, 2:Warning, 3:Error, 4:Fatal)r   rp   r   r   r   z	--use_gpuzuse GPU)r   z--use_io_bindingzuse io_binding)r   z
--providerzExecution provider to usez-nz--intra_op_num_threadsz>=0, set intra_op_num_threadsz--input_ids_namezinput name for input idsz--segment_ids_namezinput name for segment idsz--input_mask_namezinput name for attention maskz--input_tuning_resultsz3tuning results (json) to be loaded before benchmark)r   rp   r   z--output_tuning_resultsz1tuning results (json) to be saved after benchmarkz-az--average_sequence_lengthz)average sequence length excluding paddingz-rz--random_sequence_lengthz3use uniform random instead of fixed sequence length)r   z--mask_typezmmask type: (1: mask index or sequence length, 2: raw 2D mask, 3: key len, cumulated lengths of query and key))argparseArgumentParseradd_argumentr   r   set_defaults
parse_args)parserr   r   r   r   parse_arguments  s  					r   c                  C   s  t  } | jdkrtdtd| j | _| jdkr| j| _t }|	 }t
| j}t|dkr5t|dks9tdt| j| j| j| j| j| j| j| j}|D ])}t|| j| j| j| j| j| j| j| j| j| j| j| j}t d| t!||| qNt"|# ddd	 d
}t$j%&t'| jj(d)| jrdndd&dd t"|D | jt*+ ,d}t-|dddK}	t.j/|	ddd}
d }|D ]6\}}|0d}|d u rg d}|1dd |D  |
2| dd |D }|1dd |D  |
2| qW d    n1 sw   Y  t d| d S )Nr   r3   r      z batch_size not in range [1, 128]ztest settingFc                 S   s   | d S )Nr3   r   )xr   r   r   <lambda>Q  s    zmain.<locals>.<lambda>)r   r   zperf_results_{}_B{}_S{}_{}.txtGPUCPU-c                 S   s   g | ]}t |qS r   )r   r`   r   r   r   r   r   W  s    zmain.<locals>.<listcomp>z%Y%m%d-%H%M%Szw+r   )newline	
)	delimiterlineterminatorr   )zLatency(ms)Latency_P50Latency_P75Latency_P90Latency_P95Latency_P99zThroughput(QPS)c                 S      g | ]	}| d d qS )=r   splitr   r   r   r   r   k      c                 S   s   g | ]}t |d qS )r   )r   r   r   r   r   r   n  s    c                 S   r   )r   r3   r   r   r   r   r   r   o  r   zTest summary is saved to)3r   r   maxr   samplesr   r	   r   Managerdictsetr   r   	Exceptionr   modelr    r!   r"   r#   r$   r%   r&   r   r   r   r   r   r   r   r   r   r9   r   sortedrg   r   r   r   r   parentr   r   r   strftimerG   csvwriterr   r   writerow)r   managerr   batch_size_setr   r   r   sorted_resultssummary_filetsv_file
tsv_writerheadersr   perf_resultparamsvaluesr   r   r   main!  s~   




	
	
r  __main__)Nr'   N)%r   r
  rI   r   r   r   r   r   dataclassesr   r   pathlibr   numpyrS   r   rQ   bert_test_datar   r   r   r   rP   rZ   rl   rw   r   r   r   r   r   r   r   r   r  r   __spec__r   r   r   r   <module>   sP   
Y
E #T
