o
    hiP                     @   s:  d dl Z d dlZd dlZd dlZd dlZd dlZd dlZd dlZd dlm	Z	 d dl
m
Z
 d dlmZ d dlZd dlZd dlZd dlmZmZ e	G dd dZe	G dd	 d	Z		
	d&ddZdd Zdd Zdd Zdd Zdd Zdd Zdd Zdd Zdd Zdd  Z d!d" Z!d#d$ Z"e#d%krdZ$e"  dS dS )'    N)	dataclass)datetime)Path)generate_test_dataget_bert_inputsc                   @   sv   e Zd ZU eed< eed< eed< eed< eed< eed< eed< eed< eed	< eed
< eed< eed< eed< dS )TestSetting
batch_sizesequence_length
test_cases
test_timesuse_gpuuse_io_bindingproviderintra_op_num_threadsseedverboselog_severityaverage_sequence_lengthrandom_sequence_lengthN)__name__
__module____qualname__int__annotations__boolstr r   r   e/home/ubuntu/transcripts/venv/lib/python3.10/site-packages/onnxruntime/transformers/bert_perf_test.pyr   !   s   
 r   c                   @   sV   e Zd ZU eed< eed< eed< eed< eed< edB ed< edB ed< eed	< dS )
ModelSetting
model_pathinput_ids_namesegment_ids_nameinput_mask_name	opt_levelNinput_tuning_resultsoutput_tuning_results	mask_type)r   r   r   r   r   r   r   r   r   r   r   2   s   
 r      c                 C   sd  dd l }|| |rd| vrtd |rI|dkr ddg}n,|dkr)ddg}n#|d	kr2g d
}n|dkr;ddg}n|dkrDg d}nddg}ndg}| }	||	_|jj|	_|d u rb|j	j
|	_n5|dkrl|j	j|	_n+|dkrv|j	j|	_n!|dkr|j	j|	_n|dkr|j	j|	_n|dkr|j	j
|	_n||	_|d ur||	_|j| |	|d}
|r|dkrd|
 v sJ nW|dkrd|
 v sJ nJ|d	krd|
 v sJ d|
 v sJ n5|dkrd|
 v sJ n(|dkrd|
 v sJ d|
 v sJ nd|
 v sJ n	d|
 v sJ |d ur0t|}|
t| W d    |
S 1 s+w   Y  |
S )Nr   CUDAExecutionProviderzWarning: Please install onnxruntime-gpu package instead of onnxruntime, and use a machine with GPU for testing gpu performance.dmlDmlExecutionProviderCPUExecutionProviderrocmROCMExecutionProvidermigraphx)MIGraphXExecutionProviderr-   r+   cudatensorrt)TensorrtExecutionProviderr(   r+      r'      c   )	providersr/   r2   )onnxruntimeset_default_logger_severityget_available_providersprintSessionOptionslog_severity_levelExecutionModeORT_SEQUENTIALexecution_modeGraphOptimizationLevelORT_ENABLE_ALLgraph_optimization_levelORT_DISABLE_ALLORT_ENABLE_BASICORT_ENABLE_EXTENDEDORT_ENABLE_LAYOUTr   InferenceSessionget_providersopenset_tuning_resultsjsonload)r   r   r   r   rB   r   tuning_results_pathr7   execution_providerssess_optionssessionfr   r   r   create_session>   sx   	










rR   c                 C   s,   t jtjt jtjt jtjt jtji}||  S )N)torchfloat32npfloat16int32int64longlong)
torch_typetype_mapr   r   r   
numpy_type   s   r\   c                    s4    fdd|   D } fdd|  D }||fS )Nc                    "   i | ]\}}|t | qS r   rS   
from_numpyto.0namearraydevicer   r   
<dictcomp>      " z/create_input_output_tensors.<locals>.<dictcomp>c                    r]   r   r^   ra   re   r   r   rg      rh   )items)inputsoutputsrf   input_tensorsoutput_tensorsr   re   r   create_input_output_tensors   s   rn   c              
   C   sx   |   }| D ]\}}|||jjdt|j|j|  q| D ]\}}|	||jjdt|j|j|  q#|S Nr   )

io_bindingri   
bind_inputrf   typer\   dtypeshapedata_ptrbind_output)sessrl   rm   rp   rc   tensorr   r   r   create_io_binding   s(   ry   c                 C   s   g }g }|j r	dnd}t|D ]I\}}| ||}	||	 i }
tt|D ]
}|	| |
|| < q&t||
|\}}t| ||}| | t	
 }| | t	
 | }|| q||fS )Nr0   cpu)r   	enumeraterunappendrangelenrn   ry   run_with_iobindingtimeitdefault_timer)rP   
all_inputsoutput_namestest_settingresultslatency_listrf   _test_case_idrj   resultrk   irl   rm   rp   
start_timelatencyr   r   r   %onnxruntime_inference_with_io_binding   s"   


r   c           
      C   st   t |dkr| |t| g }g }t|D ]\}}t }| ||}t | }	|| ||	 q||fS ro   )r   r|   randomchoicer{   r   r   r}   )
rP   r   r   r   r   r   rj   r   r   r   r   r   r   onnxruntime_inference   s   
r   c                 C   s   |  }dtj|  d}|d|j d|j ddd7 }|d|j d|j d7 }|d	|j	 d
|j
 d7 }|d|j d|j d7 }|d|j d7 }|d|j 7 }|S )Nzmodel=,zgraph_optimization_level=z,intra_op_num_threads=zGraphOptimizationLevel.ORT_ zbatch_size=z,sequence_length=ztest_cases=z,test_times=zuse_gpu=z,use_io_binding=zaverage_sequence_length=zrandom_sequence_length=)get_session_optionsospathbasenamerB   r   replacer   r	   r
   r   r   r   r   r   )r   rP   r   rO   optionr   r   r   	to_string   s   r   c              	   C   s   t | j|j|j|| j|j| jd}dd | D }t| j||}||v r,t	d| d S t	d| g }|j
rMt|jD ]}	t||||\}
}|| q;nt|jD ]}	t|||\}
}|| qRt|d }t|}t|d}t|d}t|d	}t|d
}t|d}|jd|  }|||||||f||< t	dt|dt|d | jrtj| j}tj|r|}|ddd  dt   d}t	d|d|d |  }t!|d}t"#|| W d    n1 sw   Y  t	d| d S d S )N)r   rM   c                 S   s   g | ]}|j qS r   )rc   )rb   outputr   r   r   
<listcomp>  s    z run_one_test.<locals>.<listcomp>zskip duplicated test:zRunning test:  2   K   Z   _   r5   g     @@z,Average latency = {} ms, Throughput = {} QPS.2fz.jsonr3   r   .zWARNING:zexists, will write tozinstead.wzTuning results is saved to)$rR   r   r   r   r#   r   r$   get_outputsr   r:   r   r~   r   r   extendr   rU   rd   
statisticsmean
percentiler   formatr%   r   r   abspathexistsrsplitr   now	timestampget_tuning_resultsrI   rK   dump)model_settingr   perf_resultsr   r   rP   r   keyall_latency_list_ir   r   
latency_msaverage_latency
latency_50
latency_75
latency_90
latency_95
latency_99
throughputoutput_pathold_output_pathtrsrQ   r   r   r   run_one_test   sr   	



$r   c                 C   s,   t jt| ||||fd}|  |  d S )N)targetargs)multiprocessingProcessr   startjoin)r   r   r   r   r   processr   r   r   launch_test=  s   
r   c           	      C   s   |j d urt| ||||j  d S tjdd}tjdd}t||h}tdtd|D ]}||vr5|| q*|jdd |D ]
}t| |||| q>d S )NF)logicalTr3      )reverse)	r   r   psutil	cpu_countlistr~   minr}   sort)	r   r   r   r   r   logical_corescandidate_threadsr   r   r   r   r   run_perf_testsL  s(   

r   c                 C   s|   t | j| j| j| j\}}}td|j d|j d|j  t	|j|j|j|j
|j||||j|j| jd}t| ||| d S )NzGenerating z samples for batch_size=z sequence_length=)r&   )r   r   r    r!   r"   r:   r
   r   r	   r   r   r   r   r   r&   r   )r   r   r   	input_idssegment_ids
input_maskr   r   r   r   run_performanced  s.   
r   c                  C   s  t  } | jddtdd | jdddtddd	 | jd
ddtdd | jddtddd | jdddtddd | jddtg dddd | jddtddd | jdddd d! | jdd" | jd#dtd$g d%d&d' | jd(ddd)d! | jdd* | jd+ddd,d! | jdd- | jd.dtd d/d | jd0d1dtd d2d | jd3dtd d4d | jd5dtd d6d | jd7dtd d8d | jd9d td:d; | jd<d td=d; | jd>d?d@tdAd; | jdBdCdddDd! | jddE | jdFdtd$dGd |  }|S )HNz--modelTzbert onnx model path)requiredrr   helpz-bz--batch_size+zKbatch size of input. Allow one or multiple values in the range of [1, 128].)r   rr   nargsr   z-sz--sequence_lengthz maximum sequence length of inputz	--samplesF
   z!number of samples to be generated)r   rr   defaultr   z-tz--test_timesr   zJnumber of times to run per sample. By default, the value is 1000 / samplesz--opt_level)r   r3   r'   r4   r5   r5   zfonnxruntime optimization level: 0 - disable all, 1 - basic, 2 - extended, 3 - layout, 99 - enable all.)r   rr   choicesr   r   z--seedr4   zPrandom seed. Use the same seed to make sure test data is same in multiple tests.z	--verbose
store_truezprint verbose information)r   actionr   )r   z--log_severityr'   )r   r3   r'   r4      z.0:Verbose, 1:Info, 2:Warning, 3:Error, 4:Fatal)r   rr   r   r   r   z	--use_gpuzuse GPU)r   z--use_io_bindingzuse io_binding)r   z
--providerzExecution provider to usez-nz--intra_op_num_threadsz>=0, set intra_op_num_threadsz--input_ids_namezinput name for input idsz--segment_ids_namezinput name for segment idsz--input_mask_namezinput name for attention maskz--input_tuning_resultsz3tuning results (json) to be loaded before benchmark)r   rr   r   z--output_tuning_resultsz1tuning results (json) to be saved after benchmarkz-az--average_sequence_lengthz)average sequence length excluding paddingz-rz--random_sequence_lengthz3use uniform random instead of fixed sequence length)r   z--mask_typezmmask type: (1: mask index or sequence length, 2: raw 2D mask, 3: key len, cumulated lengths of query and key))argparseArgumentParseradd_argumentr   r   set_defaults
parse_args)parserr   r   r   r   parse_arguments  s  					r   c                  C   s  t  } | jdkrtdtd| j | _| jdkr| j| _t }|	 }t
| j}t|dkr5t|dks9tdt| j| j| j| j| j| j| j| j}|D ])}t|| j| j| j| j| j| j| j| j| j| j| j| j}t d| t!||| qNt"|# ddd	 d
}t$j%&t'| jj(d)| jrdndd&dd t"|D | jt*+ ,d}t-|dddK}	t.j/|	ddd}
d }|D ]6\}}|0d}|d u rg d}|1dd |D  |
2| dd |D }|1dd |D  |
2| qW d    n1 sw   Y  t d| d S )Nr   r3   r      z batch_size not in range [1, 128]ztest settingFc                 S   s   | d S )Nr3   r   )xr   r   r   <lambda>S  s    zmain.<locals>.<lambda>)r   r   zperf_results_{}_B{}_S{}_{}.txtGPUCPU-c                 S   s   g | ]}t |qS r   )r   rb   r   r   r   r   r   Y  s    zmain.<locals>.<listcomp>z%Y%m%d-%H%M%Szw+r   )newline	
)	delimiterlineterminatorr   )zLatency(ms)Latency_P50Latency_P75Latency_P90Latency_P95Latency_P99zThroughput(QPS)c                 S      g | ]	}| d d qS )=r   splitr   r   r   r   r   m      c                 S   s   g | ]}t |d qS )r   )r   r   r   r   r   r   p  s    c                 S   r   )r   r3   r   r   r   r   r   r   q  r   zTest summary is saved to)3r   r   maxr   samplesr   r	   r   Managerdictsetr   r   	Exceptionr   modelr    r!   r"   r#   r$   r%   r&   r   r   r   r   r   r   r   r   r   r:   r   sortedri   r   r   r   r   parentr   r   r   strftimerI   csvwriterr   r   writerow)r   managerr   batch_size_setr   r   r   sorted_resultssummary_filetsv_file
tsv_writerheadersr   perf_resultparamsvaluesr   r   r   main#  s~   




	
	
r  __main__)Nr'   N)%r   r  rK   r   r   r   r   r   dataclassesr   r   pathlibr   numpyrU   r   rS   bert_test_datar   r   r   r   rR   r\   rn   ry   r   r   r   r   r   r   r   r   r  r   __spec__r   r   r   r   <module>   sP   
[
E #T
