o
    پi                     @   s|   d dl Z d dlZd dlZ		ddededefdd	ZG d
d dZG dd dZ				ddededededef
ddZ	dS )    N   
   Fnum_warmups	num_testshigh_precisionc                 C   s   t j  t jtdt jdd}|  t|D ]}|   q|r6t jdt jdd}t jdt jdd}||  t jj	dd}t jj	dd}	|
  t|D ]}
|   qL|	
  t j  ||	| d S )Ng    Acudadtypedevice)    r   T)enable_timing     @@)torchr   synchronizeemptyintzero_rangerandnfloatEventrecordelapsed_time)fnr   r   r   cache_xystart_event	end_eventi r!   K/home/ubuntu/.local/lib/python3.10/site-packages/deep_gemm/testing/bench.pybench   s"   

r#   c                   @      e Zd Zdd Zdd ZdS )empty_suppressc                 C   s   | S Nr!   selfr!   r!   r"   	__enter__$      zempty_suppress.__enter__c                 G   s   d S r&   r!   r(   r   r!   r!   r"   __exit__'   r*   zempty_suppress.__exit__N__name__
__module____qualname__r)   r,   r!   r!   r!   r"   r%   #   s    r%   c                   @   r$   )suppress_stdout_stderrc                 C   s   t tjd| _t tjd| _tj | _tj	 | _
ttj | _ttj	 | _tj| _tj	| _t| j | j t| j | j
 | jt_| jt_	| S )Nw)openosdevnulloutnull_fileerrnull_filesysstdoutfilenoold_stdout_fileno_undupstderrold_stderr_fileno_undupdupold_stdout_filenoold_stderr_fileno
old_stdout
old_stderrdup2r'   r!   r!   r"   r)   ,   s   z suppress_stdout_stderr.__enter__c                 G   s`   | j t_| jt_t| j| j t| j	| j
 t| j t| j	 | j  | j  d S r&   )rA   r8   r9   rB   r<   r4   rC   r?   r;   r@   r=   closer6   r7   r+   r!   r!   r"   r,   @   s   
zsuppress_stdout_stderr.__exit__Nr-   r!   r!   r!   r"   r1   +   s    r1      Tsuppress_kineto_output
trace_pathflush_l2with_multiple_kernelsc              	      s\  t |tst |tsJ t |t}ttjddr$|r"dt| S dS td}|   |r/tnt	}	|	 R t
jjddddd}
t
jjt
jjjg|
d}|* tdD ]}t|D ]}|rgt
j|t
jd	d
  |   qX|  qRW d    n1 szw   Y  W d    n1 sw   Y  | jdddd}t |tr|fn|}|s|D ] t fdd|D dksJ d  dq|d ur|| ddd}g }|D ]P d}d}|D ]9} |v r| d }| d }| D ]!\}}||v r|t||d| t| 7 }|t|7 } nqq||dkr|| nd q|r*t|S |d S )NDG_USE_NVIDIA_TOOLSr   )   rK   g    eA)waitwarmupactiverepeat)
activitiesschedule   r   r   cuda_time_totald   )sort_bymax_name_column_width
c                    s   g | ]} |v qS r!   r!   ).0linenamer!   r"   
<listcomp>r   s    z bench_kineto.<locals>.<listcomp>zErrors of the kernel z in the profiling tabler   g    .A)msus )
isinstancestrtupler   r4   environgetlenr1   r%   r   profilerrQ   profileProfilerActivityCUDAr   r   r   stepkey_averagestablesplitsumexport_chrome_traceitemsr   replaceappend)r   kernel_namesr   rF   rG   rH   rI   is_tupleflush_l2_sizesuppressrQ   rh   r    r   
prof_linesunitskernel_times
total_time	total_numrY   time_strnum_strunitscaler!   rZ   r"   bench_kinetoN   s\   

,



 r   )r   r   F)rE   FNTF)
r4   r8   r   r   boolr#   r%   r1   rc   r   r!   r!   r!   r"   <module>   s.    
#