o
    ٷi                     @   s  d dl Z d dlZd dlZd dlZd dlZd dlZd dlmZ d dlZd dl	Z	d dl
mZ dddddd	d
ddd	ZdddddZdd Zdd ZdWddZdededefddZdedededefd d!Zd"eded#ed$edef
d%d&Z	'dXd#ed(ed)efd*d+Zd,efd-d.Z	'dXd#ed(efd/d0Z	'dXdeded1ed#eded2ed3ed$ed4ed5ed6ed)efd7d8Z		9	'dYdededed:efd;d<Z	'	'dZd#ed(efd=d>Z	'	'dZdeded1ed#eded2ed3ed$ed4ed5ed:ed)efd?d@Z	'	9d[dAedBed#eded2ed3ed$ed4ed5edCedDedEefdFdGZ	'	9	'd\dAedBeded#eded2ed3ed$ed4ed5edCedDedEed)efdHdIZ 	'	9	'd\dAedBed#eded2ed3ed$ed4ed5edCedDed)efdJdKZ!	'	9	'd\dAedBed#eded2ed3ed$ed4ed5edCedDed)efdLdMZ"	9d]ded#edededed2ed3ed$ed4ed5ed)efdNdOZ#dPdQ Z$d]dRdSZ%dTdU Z&e'dVkrd dl(Z(ze&  W dS  e)y   e(j*e+   Y dS w dS )^    N)Pathmeasure_memoryzrunwayml/stable-diffusion-v1-5zstabilityai/stable-diffusion-2z stabilityai/stable-diffusion-2-1z+stabilityai/stable-diffusion-xl-refiner-1.0z/stabilityai/stable-diffusion-3-medium-diffusersz'stabilityai/stable-diffusion-3.5-mediumz&stabilityai/stable-diffusion-3.5-largez black-forest-labs/FLUX.1-schnellzblack-forest-labs/FLUX.1-dev)	1.5z2.02.1zxl-1.0z3.0Mz3.5Mz3.5LzFlux.1SzFlux.1DCUDAExecutionProviderROCMExecutionProviderMIGraphXExecutionProviderTensorrtExecutionProvider)cudarocmmigraphxtensorrtc                  C   s   g d} d}| |fS )N)
z.a photo of an astronaut riding a horse on marsz@cute grey cat with blue eyes, wearing a bowtie, acrylic paintingzia cute magical flying dog, fantasy art drawn by disney concept artists, highly detailed, digital paintingzdan illustration of a house with large barn with many cute flower pots and beautiful blue sky sceneryzgone apple sitting on a table, still life, reflective, full color photograph, centered, close-up productzWbackground texture of stones, masterpiece, artistic, stunning photo, award winner photozSnew international organic style house, tropical surroundings, architecture, 8k, hdrznbeautiful Renaissance Revival Estate, Hobbit-House, detailed painting, warm colors, 8k, trending on Artstationzcblue owl, big green eyes, portrait, intricate metal design, unreal engine, octane render, realisticzldelicate elvish moonstone necklace on a velvet background, symmetrical intricate motifs, leaves, flowers, 8kz*bad composition, ugly, abnormal, malformed )promptsnegative_promptr   r   n/home/ubuntu/.local/lib/python3.10/site-packages/onnxruntime/transformers/models/stable_diffusion/benchmark.pyexample_prompts(   s   r   c                   C   s   dS )N)zwarm upbadr   r   r   r   r   warmup_prompts;   s   r   c                 C   s   t d|| |dS )NT)is_gpufuncmonitor_typestart_memoryr   )r   r   r   r   r   r   measure_gpu_memory?   s   r   
model_name	directorydisable_safety_checkerc           	      C   s   ddl m}m} dd l}|d ur%tj|sJ | }|j|||d}n	|j| d|dd}|	|j
j|_
|jdd |rDd |_d |_|S )Nr   )DDIMSchedulerOnnxStableDiffusionPipeline)providersess_optionsonnxT)revisionr    use_auth_tokendisable)	diffusersr   r   onnxruntimeospathexistsSessionOptionsfrom_pretrainedfrom_config	schedulerconfigset_progress_bar_configsafety_checkerfeature_extractor)	r   r   r    r   r   r   r(   session_optionspiper   r   r   get_ort_pipelineC   s,   r6   enable_torch_compileuse_xformersc                 C   s`  d| v r+ddl m} |j| tjdd}|r)|jjtjd tj|jddd	|_|S d
| v rVddl m	} |j| tjdd}|rT|jjtjd tj|jddd	|_|S ddl m
}m} ddlm}	m}
 |j| |
dd}|jj|	d |r}|  |rt|j|_t|j|_t|j|_td ||jj|_|jdd |rd |_d |_|S )NFLUXr   )FluxPipeline)torch_dtyper   )memory_formatzmax-autotuneT)mode	fullgraphzstable-diffusion-3)StableDiffusion3Pipeline)r   StableDiffusionPipeline)channels_lastfloat16z)Torch compiled unet, vae and text_encoderr%   )r'   r:   r-   torchbfloat16totransformerrA   compiler?   r   r@   rB   unet*enable_xformers_memory_efficient_attentionvaetext_encoderprintr.   r/   r0   r1   r2   r3   )r   r   r7   r8   r:   r5   r?   r   r@   rA   rB   r   r   r   get_torch_pipelinea   s>   rM   engine
batch_sizestepsc                 C   s>   | dd dd}|  d| d| d| |rd S d	 S )
N/zstable-diffusion-sd__b_s _safe)splitreplace)rN   r   rO   rP   r   short_model_namer   r   r   get_image_filename_prefix   s   (r\   Fimage_filename_prefixskip_warmupc                    s8  ddl m} t|sJ t \}} fdd}t|	||}t|	||}|  g }t|D ]K\}}||kr< nBt }|g  |g  dj}t }|| }|| t	d|dd t|D ]\}}|
| d	| d	| d
 qjq2ddlm} d| ||t|t| t|||dS )Nr   )r   c                     s4   rd S t  \} }| g  |g  d d S )Npromptheightwidthnum_inference_stepsr   r   r`   negativerO   ra   r5   r^   rP   rb   r   r   warmup   s   

z run_ort_pipeline.<locals>.warmupr_   Inference took .3f secondsrT   .jpg__version__r(   rN   versionra   rb   rP   rO   batch_countnum_promptsaverage_latencymedian_latencyfirst_run_memory_MBsecond_run_memory_MB)r'   r   
isinstancer   r   	enumeratetimeimagesappendrL   saver(   rn   sumlen
statisticsmedian)r5   rO   r]   ra   rb   rP   rr   rq   r   memory_monitor_typer^   r   r   r   rh   first_run_memorysecond_run_memorylatency_listir`   inference_startrz   inference_endlatencykimageort_versionr   rg   r   run_ort_pipeline   sT   

r   returnc                 C   sF   |s|rd| ind| g| ini }t j r!t jddd|d< |S )Nr   r   )device{   	generator)rC   r   is_available	Generatormanual_seed)r   use_num_images_per_promptis_fluxrO   kwargsr   r   r   get_negative_prompt_kwargs   s   

r   c                    sV  t  \}}dd l}t|j fdd}t|	||}t|	||}|  td g }t|D ]\\}}||kr? nStj	  t

 }t|d }d|g  d|j}tj	  t

 }|| }|| td|dd t|D ]\}}|| d	| d	| d
 q~q5dtj ||t|t| t|||dS )Nr   c                     sB   rd S t  \} }t|d }d| g  d| d S )NFr`   ra   rb   rc   r   r   r   r`   rf   extra_kwargsrO   ra   r   r5   r^   rP   rb   r   r   rh     s
   
"z"run_torch_pipeline.<locals>.warmupFr   ri   rj   rk   rT   rl   rC   ro   r   )r   r'   rw   r:   r   rC   set_grad_enabledrx   r   synchronizery   r   rz   r{   rL   r|   rn   r}   r~   r   r   )r5   rO   r]   ra   rb   rP   rr   rq   r   r   r^   r   r   r'   rh   r   r   r   r   r`   r   r   rz   r   r   r   r   r   r   r   run_torch_pipeline   s\   




r   r    ra   rb   rr   rq   tuningc                 C   s   |}|r|dv r|dddf}t   }t| |||}t   }td||  d td| |||}t||||||||	|
||d}|| ||dd	|d
d |S )N)r   r      )tunable_op_enabletunable_op_tuning_enableModel loading took rk   ortr^   ExecutionProviderrW   Fr   r   r    r   enable_cuda_graph)ry   r6   rL   r\   r   updaterZ   )r   r   r    rO   r   ra   rb   rP   rr   rq   r   r   r   r^   provider_and_options
load_startr5   load_endr]   resultr   r   r   run_ort:  s<   
	r   Tuse_io_bindingc                 C   sb   ddl m} |d urtj|r|j|||d}n|j| d||d}|| |r/d |_d |_|S )Nr   )ORTPipelineForText2Image)r    r   T)exportr    r   )	optimum.onnxruntimer   r)   r*   r+   r-   save_pretrainedr2   r3   )r   r   r    r   r   r   pipeliner   r   r   get_optimum_ort_pipelinen  s   
r   c                    sx  t dt ddlm} t|t \}} f	dd}t|	||}t|	||}|  t|}g }t|D ]\\}}||krK nSt		 }r`d|d|j
}nd|g d|j
}t		 }|| }|| t d|d	d
 t|D ]\}}|| d| d| d qqAddlm} d| |t|t| t|||dS )NzPipeline typer   )ORTFluxPipelinec                     sd   rd S t  \} }t|}r!d|  d| d S d| g d| d S )Nr`   ra   rb   rc   num_images_per_promptr   r   r   r   	rq   rO   ra   r   r5   r^   rP   r   rb   r   r   rh     s   

"	z(run_optimum_ort_pipeline.<locals>.warmupr   r   ri   rj   rk   rT   rl   rm   optimum_ortro   r   )rL   type&optimum.onnxruntime.modeling_diffusionr   rw   r   r   r   rx   ry   rz   r{   r|   r(   rn   r}   r~   r   r   )r5   rO   r]   ra   rb   rP   rr   rq   r   r   r   r^   r   r   r   rh   r   r   r   r   r   r`   r   rz   r   r   r   r   r   r   r   r   run_optimum_ort_pipeline  sl   

	
r   c                 C   s   t   }t| ||||d}t   }td||  d |r&| d t|j n| }td||||}t||||||||	|
||d}|| ||dd|d	d
 |S )Nr   r   rk   rT   optimumr   r   rW   Fr   )	ry   r   rL   r   namer\   r   r   rZ   )r   r   r    rO   r   ra   rb   rP   rr   rq   r   r   r   r^   r   r5   r   full_model_namer]   r   r   r   r   run_optimum_ort  s@   


	r   work_dirrp   max_batch_sizenvtx_profileuse_cuda_graphc           -         sn  t d ddlm} |   |ksJ ddlm} ||}| }ddlm}m} ddl	m
} |j}|| ||\}}}}}||d|d|||||d		jj|||d
 dddtj d    fdd}t|
||	}t|
||	}|  td| |}g }t \} }!t| D ]Q\}"}#|"|kr nHt }$j|#g  |!g  ddd\}%}&t }'|'|$ }(||( t d|(dd|&  t|%D ]\})}*|*| d|" d|) d qq  ddlm}+ ddlm}, i d| ddd|,dd|+ dd |d!d"d#d$ d%|d&|d't|t | d(t!"|d)|d*|d+|d,|S )-Nzd[I] Initializing ORT TensorRT EP accelerated StableDiffusionXL txt2img pipeline (static input shape)r   init_trt_pluginsPipelineInfo
EngineTypeget_engine_pathsr@   DDIMFr/   
output_dirverboser   r   r   framework_model_direngine_type   T)opt_image_heightopt_image_widthopt_batch_sizestatic_batchstatic_image_shapemax_workspace_size	device_idc                     s.   t  \} }j| g  |g  d d S N)denoising_stepsr   runre   rO   ra   r   rP   rb   r   r   rh   \  s   
$z"run_ort_trt_static.<locals>.warmuport_trtg      @r   r   guidanceseedEnd2End took rj    seconds. Inference latency: rT   rl   rm   r   rN   r(   rp   r    z	tensorrt()r   ra   rb   rP   rO   rq   rr   rs   rt   ru   rv   r   r   )#rL   trt_utilitiesr   diffusion_modelsr   
short_nameengine_builderr   r   pipeline_stable_diffusionr@   ORT_TRTbackendbuild_enginesrC   r   current_deviceload_resourcesr   r\   r   rx   ry   r   r{   r|   teardownr   rn   r(   r   r}   r~   r   r   )-r   rp   rO   r   ra   rb   rP   rr   rq   r   r   r   r   r   r   r   pipeline_infor   r   r   r@   r   onnx_dir
engine_dirr   r   rT   rh   r   r   r]   r   r   r   r   r`   r   rz   pipeline_timer   r   r   r   trt_versionr   r   r   r   run_ort_trt_static  s   

	
	

r   c           1         s8  t d ddlm} ddlm} |   |ksJ ddlm} ||}ddlm}m	} ddl
m} |j}|| ||\}}}}}||d|d	||d
|djj|||d d
d
d	|d tj j }||\}}j|    fdd} t|| |
}!t|| |
}"|   td| |}#g }$t \}%}&t|%D ]P\}'}(|'|kr nGt })j|(g  |&g  dd\}*}+t },|,|) }-|$|- t d|-dd|+  t|*D ]\}.}/|/|# d|' d|. d qq  dd l}0d|0jd |	|t|$t |$ t!"|$|!|"|dS )N][I] Initializing TensorRT accelerated StableDiffusionXL txt2img pipeline (static input shape)r   cudartr   r   r   r   r   FT)r/   r   r   r   r   r   r   r   r   r   r   
onnx_opsetr   r   r   r   static_shapeenable_all_tacticstiming_cachec                     s6   rd S t  \} }j| g  |g  d d S r   r   re   rO   ra   r   r^   rP   rb   r   r   rh     s   
$z#run_tensorrt_static.<locals>.warmuptrtr   )r   r   r   rj   r   rT   rl   r   default)rN   rp   r    ra   rb   rP   rO   rq   rr   rs   rt   ru   rv   r   )#rL   r   r   r   r   r   r   r   r   r   r   r@   TRTr   load_enginesmaxmax_device_memory
cudaMallocactivate_enginesr   r   r\   r   rx   ry   r   r{   r|   r   r   rn   r}   r~   r   r   )1r   rp   r   rO   r   ra   rb   rP   rr   rq   r   r   r   r   r   r^   r   r   r   r   r   r   r@   r   r   r   r   r   r  r	  rT   shared_device_memoryrh   r   r   r]   r   r   r   r   r`   r   rz   r   r   r   r   r   r  r   r  r   run_tensorrt_static  s   


r  c           *         sN  t d dd l}ddlm} ddlm} d dks$d dkr/td d d|  ks8J dd	lm} dd
l	m
 m  f	dd}ddlm} ||}|||tj j }||\}}j|  dfdd			
fdd}t|
||	}t|
||	}|  | }td||}g }t \}} t|D ]L\}!}"|!|kr nCt }#	|"g | g dd\}$}%t }&|&|# }'||' t d|'dd|%  t|$D ]\}(})|)| d|! d|( d qq  |d|jd||t|t| t !|||dS )Nr   r   r   r      zCImage height and width have to be divisible by 8 but specified as: z and .r   r   c           	         s\    j }||\}}}}}| |d|d||d	}|jj|||dddd|d |S )Nr   Fr   r   Tr   )r  r   r  )	pipeline_classr   r   r   r   r   r   r  r   )	r   rO   r   ra   r   r   r   rb   r   r   r   init_pipelineJ  s:   z-run_tensorrt_static_xl.<locals>.init_pipeliner   c              	      s   j | | d|dS Ng      @r   r   r`   r   r   )image_heightimage_widthr   rP   r   r   run_sd_xl_inferencez     z3run_tensorrt_static_xl.<locals>.run_sd_xl_inferencec                     ,   rd S t  \} }| g  |g   d S Nrd   re   rO   r  r^   r   r   rh        
z&run_tensorrt_static_xl.<locals>.warmupr  r   r   r   rj   r   rT   .pngr   r  r   rN   rp   r    ra   rb   rP   rO   rq   rr   rs   rt   ru   rv   r   r  )"rL   r   r   r   r   r   
ValueErrorr   r   r   r   r   r   r@   r  r   r	  r
  r  r   r   r   r\   r   rx   ry   r{   r|   r   rn   r}   r~   r   r   )*r   rp   rO   r   ra   rb   rP   rr   rq   r   r   r   r   r   r^   r  r   r   r   r  r@   r   r	  rT   r  rh   r   r   r   r]   r   r   r   r   r`   r   rz   r   r   r   r   r   r   )r   rO   r   ra   r  r  r   r   r   r  r^   rP   r   rb   r   r   run_tensorrt_static_xl#  sx   #


r!  c           %         s  ddl m} ddlm} |||j| || d |ksJ   dfdd	 fdd}t|
||	}t|
||	}|  j }t	d	| |}g }t
 \}}t|D ]S\}}||kri nJt }|g  |g  d
d\}}t }|| }|| td|dd|  t|D ]\} }!| d| d|  d}"|!|" td|" qq_  ddlm}# ddlm}$ |d|$d|# d ||t|t| t||||dS )Nr   )initialize_pipeline)r   )rp   r   r   ra   rb   r   r   r   c              	      s   j | | d|dS r  r  r  )ra   r   rP   rb   r   r   r    r  z+run_ort_trt_xl.<locals>.run_sd_xl_inferencec                     r  r  rd   re   r  r   r   rh     r  zrun_ort_trt_xl.<locals>.warmupr   r   r  r   rj   r   rT   r  zImage saved torm   r(   r   r   r  r  )
demo_utilsr"  r   r   r   r   r   r   r   r\   r   rx   ry   r{   rL   r|   r   r   rn   r(   r}   r~   r   r   )%r   rp   rO   r   ra   rb   rP   rr   rq   r   r   r   r   r   r^   r"  r   rh   r   r   r   r]   r   r   r   r   r`   r   rz   r   r   r   r   r   filenamer   r   r   )rO   ra   r   r  r^   rP   rb   r   run_ort_trt_xl  sp   




r%  c                 C   s   dt jj_dt jj_t d t }t| |||}t }td||  d t	d| |||}|sVt 
  t||||||||	|
||d}W d    n1 sPw   Y  nt||||||||	|
||d}|| d |rmdn|rqdnd	|dd
 |S )NTFr   rk   rC   r   rG   xformersr  r   )rC   backendscudnnenabled	benchmarkr   ry   rM   rL   r\   inference_moder   r   )r   rO   r   r7   r8   ra   rb   rP   rr   rq   r   r   r^   r   r5   r   r]   r   r   r   r   	run_torch!  s^   



	r,  c               	   C   s  t  } | jdddtdg ddd | jdd	dtd
tt dd | jddddd | jdddttt ddd | jdddtd dd | jdddtddd | jddddd  | jdd! | jd"ddd#d  | jdd$ | jd%ddd&d  | jdd' | jd(ddd)d  | jdd* | jd+ddd,d  | jdd- | jd.d/t	d0g d1d2d3 | jd4dt	d5d6d | jd7dt	d5d8d | jd9d:dt	d;d<d | jd=d>dt	d?d@d | jdAdBdt	t
d0dCdDdEd | jdFdGdt	t
d0dHdIdJd | jdKdLdddMd  | jddN |  }|S )ONz-ez--engineFr(   )r(   r   rC   r   z-Engines to benchmark. Default is onnxruntime.)requiredr   r  choiceshelpz-rz
--providerr   z8Provider to benchmark. Default is CUDAExecutionProvider.z-tz--tuning
store_truezsEnable TunableOp and tuning. This will incur longer warmup latency, and is mandatory for some operators of ROCm EP.)actionr/  z-vz	--versionr   z>Stable diffusion version like 1.5, 2.0 or 2.1. Default is 1.5.)r-  r   r.  r  r/  z-pz
--pipelinez[Directory of saved onnx pipeline. It could be the output directory of optimize_pipeline.py.)r-  r   r  r/  z-wz
--work_dirr  z?Root directory to save exported onnx models, built engines etc.z--enable_safety_checkerzEnable safety checker)r-  r1  r/  )enable_safety_checkerz--enable_torch_compilez#Enable compile unet for PyTorch 2.0)r7   z--use_xformerszUse xformers for PyTorch)r8   z--use_io_bindingzUse I/O Binding for Optimum.r   z--skip_warmupz
No warmup.r   z-bz--batch_sizer   )r            r  
          z)Number of images per batch. Default is 1.)r   r  r.  r/  z--heighti   z$Output image height. Default is 512.z--widthz#Output image width. Default is 512.z-sz--steps2   zNumber of steps. Default is 50.z-nz--num_promptsr6  z!Number of prompts. Default is 10.z-cz--batch_count      z(Number of batches to test. Default is 5.z-mz--max_trt_batch_sizer7  r5  zdMaximum batch size for TensorRT. Change the value may trigger TensorRT engine rebuild. Default is 4.z-gz--enable_cuda_graphz/Enable Cuda Graph. Requires onnxruntime >= 1.16)r   )argparseArgumentParseradd_argumentstrlist	PROVIDERSkeys	SD_MODELSset_defaultsintrange
parse_args)parserargsr   r   r   parse_argumentsf  s.  




					

rJ  c                    sL   dd l }|t }| D ] | rt fdddD r#t j qd S )Nr   c                 3   s    | ]}| j v V  qd S r  )r*   ).0xlibr   r   	<genexpr>  s    z)print_loaded_libraries.<locals>.<genexpr>)libculibnvr   )psutilProcessr)   getpidmemory_mapsanyrL   r*   )cuda_related_onlyrR  pr   rM  r   print_loaded_libraries  s   
rY  c                  C   sN  t  } t|  | jdkrU| jdv rdtjd< ddlm} ddlm} |	||	dkr1dtjd	< | j
rU| jdkrC| jd
v rC| jd u sGtd|	||	dk rUtdtjdd | jdkrbdnd}t|d }td| t| j }t| j }| jdkr| jdkrd| jv rtd t| j| j| jd| j| j| j| j| j||| jd| j
| jd}n;td t| j| j| j| j | j| j| j| j| j||| jd| j
| jd}n| jdkr|dkrd| jv rdtjd	< t|| j|| j| j | j| j| j| j| j||| j | jd}n| jdkrC| jrtj!"| jsJ dtd| d| j#  t$|| j|| j| j | j| j| j| j| j||| j#| jd }n| jdkrrd| jv rrtd! t%| j| j| jd| j| j| j| j| j||| jd| j
| jd}nt| jdkrtd" t&d>i d#| jd$| jd%|d&| jd'dd(| jd)| jd*| jd+| jd,| jd-|d.|d/| jd0dd1| j
d2| j}n)td3| j' d4| j( d5 t)|| j| j | j'| j(| j| j| j| j| j||| jd6}t| t*d7d8d9d:}g d;}	t+j,||	d<}
|
-  |
.| W d    n	1 sw   Y  | jd=kr%t/| jd
v  d S d S )?Nr(   )r   1ORT_DISABLE_TRT_FLASH_ATTENTIONr   )rp   rm   z1.16.0!ORT_ENABLE_FUSED_CAUSAL_ATTENTION)r   r   z:The stable diffusion pipeline does not support CUDA graph.z1.16z.CUDA graph requires ONNX Runtime 1.16 or laterz%(funcName)20s: %(message)s)fmtr   r   z&GPU memory used before loading models:r   xlzNTesting Txt2ImgXLPipeline with static input shape. Backend is ORT TensorRT EP.TF)r   rp   rO   r   ra   rb   rP   rr   rq   r   r   r   r   r   r^   zLTesting Txt2ImgPipeline with static input shape. Backend is ORT TensorRT EP.r   r   )r   r   r    rO   r   ra   rb   rP   rr   rq   r   r   r   r^   z?--pipeline should be specified for the directory of ONNX modelsz/Testing diffusers StableDiffusionPipeline with z provider and tuning=)r   r   r    rO   r   ra   rb   rP   rr   rq   r   r   r   r^   zGTesting Txt2ImgXLPipeline with static input shape. Backend is TensorRT.zETesting Txt2ImgPipeline with static input shape. Backend is TensorRT.r   rp   r   rO   r   ra   rb   rP   rr   rq   r   r   r   r   r   r^   zNTesting Txt2ImgPipeline with dynamic input shape. Backend is PyTorch: compile=z, xformers=r  )r   rO   r   r7   r8   ra   rb   rP   rr   rq   r   r   r^   zbenchmark_result.csvarW   )r=   newline)r   r   rN   rp   r    r   ra   rb   rP   rO   rq   rr   rs   rt   ru   rv   r   )
fieldnamesr   r   )0rJ  rL   rN   rp   r)   environ	packagingr(   rn   parser   r    r   r   coloredlogsinstallr   rC  rA  r%  r   rO   ra   rb   rP   rr   rq   max_trt_batch_sizer^   r   r2  r   r   r*   isdirr   r   r!  r  r7   r8   r,  opencsv
DictWriterwriteheaderwriterowrY  )rI  rp   r   r   r   sd_modelr    r   csv_filecolumn_names
csv_writerr   r   r   main  sh  












	
rr  __main__r  )F)r   TF)FF)FT)FTF)T),r<  rj  r)   r   sysry   pathlibr   re  rC   benchmark_helperr   rC  rA  r   r   r   r?  boolr6   rM   rE  r\   r   dictr   r   r   r   r   r   r   r  r!  r%  r,  rJ  rY  rr  __name__	traceback	Exceptionprint_exceptionexc_infor   r   r   r   <module>   s"  
-
K
W	

7
&
g	

B	
 	

 	
 $	
u	

E 
1	 
N