o
     i_                     @   sj  d dl Z d dlZd dlZd dlZd dlZd dlZd dlZd dlZd dlZd dl	Z	d dl
Z
d dlmZmZ d dlmZ d dlmZmZmZmZmZmZmZ d dlmZ d dlZd dlZd dlZ d dl!Z!d dl"Z"d dl#m$Z$ e %  edddgZ&G d	d
 d
e'Z(e!j)* Z+e+rzd dl,Z,W n e-y Z. ze/de. d dZ+W Y dZ.[.ndZ.[.ww dd Z0dSddZ1	dTde2fddZ3e+r	dUdee& fddZ4dVde2fddZ5de2ddfddZ6ej7d e8defd!d"Z9d#Z:g d$Z;de2deeee2ef ef  fd%d&Z<de2d'eeee2ef ef  ddfd(d)Z=d'eeee2ef ef  dee fd*d+Z>d'ee d,e2ddfd-d.Z?de j@fd/d0ZA	dWd1eee2ef  ddfd2d3ZB				4dXd5d6d7d8d1eee2ef  d9ee2 d:eCd;eCd<eCd=e2d>eDd?eDd@eDddfdAdBZEdCdD ZFd'ee dEee d?eDd@eDddf
dFdGZG			HdYde2dIeCdJeCdKeCddf
dLdMZHdNdO ZIe!jJdPe!jKdQe!jLdRiZMdS )Z    N)defaultdict
namedtuple)replace)AnyDict	GeneratorIteratorListSetTuple)	benchmarkTestCasefunctionnamec                   @   s   e Zd ZdS )NotSupportedInputErrorN)__name__
__module____qualname__ r   r   M/home/ubuntu/.local/lib/python3.10/site-packages/xformers/benchmarks/utils.pyr   "   s    r   zTriton is not available: z.
bench_functionsFc                 C   s   t | tjr
| jjS | jS N)
isinstance	functoolspartialfuncr   )fnr   r   r   get_func_name/   s   r   returnc           	      C   s   t | d|}t d| d ddd |  D   t|}t dd| dd	d |  D   d
d |  D }|  D ]}| D ]}|| ||  qIqC| D ]\}}t dj||dddd |D   qZt d dS )zQPrintout the contents of a dict as a human-readable and Markdown compatible arrayz Units: {:<45}z| | c                 s       | ]}d  |V  qdS )z{0:<20}|Nformat).0kr   r   r   	<genexpr>9       zpretty_print.<locals>.<genexpr>z|-{}|-c                 s   s    | ]}d  dV  qdS )z{}|z--------------------Nr!   r#   _r   r   r   r%   >   r&   c                 S       i | ]}|  D ]}|g qqS r   keysr#   vr$   r   r   r   
<dictcomp>A        z pretty_print.<locals>.<dictcomp>z| {0:<{offset}}|)offsetc                 s   r    )z{:<20}|Nr!   )r#   r.   r   r   r   r%   I   r&   N)printr"   joinr,   lenvaluesappenditems)	resultstitleunitsheaderr1   	workloadsr.   r$   wr   r   r   pretty_print5   s*   
(r>   r   lower rightr:   c           
      C   s,  |s|d }| dd dd dd dd}dd	 |  D }|  D ]}| D ]}|| t||  q+q%t }	|	d
 |	d
 |	 D ] \}}|rb||v rbt
t|  |d qLt
t|  | qLt| tjt| |d t| tjdd tj|dd t|	 dS )Graph out the contents of a dict.
    Dash key means that if the result label has this key, then it will be displayed with a dash
    .png r)   /r'   :r   c                 S   r*   r   r+   r-   r   r   r   r/   _   r0   zpretty_plot.<locals>.<dictcomp>   z--loc-   rotationtightbbox_inchesN)r   r5   r,   r6   floatpltfigureset_figwidthset_figheightr7   plotlistr9   legendylabelxtickssavefigclose)
r8   r9   r:   filenamedash_key
legend_locr<   r.   r$   fr   r   r   pretty_plotO   s,   "



r^   
test_casesc              
      s   t d}t jt jt jfD ]`}i }|D ]C\}}	}
t j||	|
||dd | D ]0tj fddd }| |}d| d|	 d	|
 }||vrLi ||< |d
|| j	< q%qt
|d||d t||t| |dd qd S )NcudaT)devicedtyperequires_gradc                      s
     S r   )r   r   atestcaser   r   <lambda>   s   
 z!bench_functions.<locals>.<lambda>r   zB=z, M=z, K=z.1fz& ------------- Type: {} ------------- )r9   r:   pytorch)r[   )torchra   bfloat16float16float32randtritontestingdo_benchr   r>   r"   r^   str)r_   shapesmetric_transformunitr9   ra   rb   r8   BMKtimemetrickeyr   rd   r   bench_functions{   s(   

r{   c                 C   s  |s|d }| dd dd dd dd}t|  }dd	 |  D }|  D ]}| D ]}|| t||  q1q+t| }	t|	}
| D ]}t|| } |
d
 }t }|	d |
d t|
D ]}|	| }|| }td
| || |}tj||d
dd qmt| tjt| dd t| t }td
|
d
 d  || |}||| tjdd tj|j dd |d |jjddd |jjddd tj|dd t| dS )r@   rA   rB   r)   rC   r'   rD   r   c                 S   r*   r   r+   r-   r   r   r   r/      r0   z"pretty_barplot.<locals>.<dictcomp>   rE   black)width	edgecolorzupper rightrF          @rH   rI   right)haTgraydashed)color	linestylerK   rL   N)r   rT   r,   r5   r6   rN   r4   rO   rP   rQ   rR   rangenparangebarr9   rU   rV   gca
set_xticksrW   setpxaxisget_majorticklabelsset_axisbelowyaxisgridrX   rY   )r8   r9   r:   rZ   r[   xlabelsr<   r.   r$   options	group_lenrz   
num_groupsgroup_widthr]   idxoptionr5   xlocax
xticks_locr   r   r   pretty_barplot   sN   "




r   rZ   c                 C   s&   zt |  W dS  ty   Y dS w )zRemove a file like rm -f.N)osremoveFileNotFoundError)rZ   r   r   r   rmf   s
   r   numc                 c   s4    dd t | D }t|V  |D ]}t| qdS )z:A context to get tempfiles and ensure they are cleaned up.c                 S   s   g | ]}t  d  qS )r|   )tempfilemkstempr(   r   r   r   
<listcomp>   s    z"temp_files_ctx.<locals>.<listcomp>N)r   tupler   )r   filesr   r   r   r   temp_files_ctx   s   

r   	algorithm)eagervanillarh   c           
      C   s"  t j| d}d}d}t|dkr|d }|d }g }t| de}t|}|D ]T}|dkr:|d tvr:||d< t	j
jjddd|d |d	 |d |t|d
 d}t	j
jjdt|d d g|d}	t|d |	_|t|d dkrx|d nd i|	f q*W d    |S 1 sw   Y  |S )N.r      r|   r   rdescriptionlabel	sub_labelnum_threads)stmtsetupglobal_setupr   r   r   envr   
runtime_usg    .A)number_per_run	raw_times	task_spec
mem_use_mbr   )r   pathbasenamesplitr4   opencsv
DictReaderBASELINE_DESCRIPTIONSr   utilscommonTaskSpecintMeasurementrN   mem_user6   META_ALGORITHM)
rZ   partsr   r   datacsvfilereaderrowr   measurementr   r   r   _benchmark_results_from_csv   sP   



r   r8   c                 C   sx   dd |D }t | ddd$}tj|t|d  d}|  |D ]}|| q"W d    d S 1 s5w   Y  d S )Nc              
   S   sX   g | ](\}}|j j|j j|j j|td |j jtv r|j jnd td|j	 |j
dqS )r   i@B )r   r   r   r   r   r   r   )r   r   r   r   getr   r   r   r   meanr   )r#   metadatar   r   r   r   r     s    
z-_benchmark_results_to_csv.<locals>.<listcomp>zw+r   )newliner   )
fieldnames)r   r   
DictWriterrT   r,   writeheaderwriterow)rZ   r8   r   r   writerdr   r   r   _benchmark_results_to_csv  s   "r   c           
      C   s   t  }t  }| D ]\}}|td}|dur|| ||jj qt|dk}t|dk}g }| D ]@\}}|td}|du rG|| q3t|}d}	|rT|jj}	|rf|r\|	d7 }	|	|7 }	|rf|	d7 }	t	|j|	d|_|| q3|S )z
    Returns a `benchmark.Compare` object, except that if we have runs
    with different algorithms, we also add the algorithm name
    in the column titles
    Nr|   r   []r   )
setr   r   addr   r   r4   r6   copyr   )
r8   all_algorithmsall_descriptionr   r   algodisplay_algodisplay_descrdisplay_resultsr   r   r   r   _finalize_results2  s6   

r   store_results_folderc                    s  | sd S t t}t t}g  | D ]4}|jj vr.|jjtv r' d|jj n |jj |j||jj |jj< |j	||jj |jj< qg }g }|
 D ]C\}||  d tjdkrp||gdgt    n||gfdd D   ||g fdd D   qM d dkrd d< n	 d  d d< |dd	f|d
dffD ]/\}}	}
tj|dg  d}|jddd|
d t  tj||	}t| td|  qd S )Nr   c                    s   g | ]
} |d   qS r   )r   r#   r   )denommemory_valuesr   r   r   o  s    z$_render_bar_plot.<locals>.<listcomp>c                    s*   g | ]}  d  d  |tj qS r   )r   mathinfr   )all_descriptionsruntime_valuesr   r   r   s  s    r   baselinez (baseline)zmem.pngz+Memory usage (vs baseline, lower is better)zruntime.pngz/Runtime speedup (vs baseline, higher is better)Configuration)columnsr   F)xkindstackedr9   zSaved plot: )r   dictr   r   r   insertr6   r   r   r   r7   r   r   r   r4   pd	DataFramerS   rO   tight_layoutr   r   r3   rX   r2   )r8   r   runtimememory_usager   all_data_memall_data_runrz   r   rZ   r9   dffilename_fullr   )r   r   r   r   r   _render_bar_plotW  sd   

r  c                  C   sr   t  } | jddtdd | jddtdd | jddd	d
 | jddtdd | jdddd
 | jdddd
 | S )z%
    Create CLI argument parser.
    z--fnNzOnly benchmark this function)defaulttypehelpz--labelzStore results to a filez--fail_if_regression
store_truez6Enabled in CI to check against performance regressions)actionr  z	--comparez8Compare to previously stored benchmarks (coma separated)z--omit-baselinesz+Do not run the (potentially slow) baselinesz--quietz*Skip intermediate results and progress bar)argparseArgumentParseradd_argumentrq   )parserr   r   r   create_argparser  s:   r  casesc              
   K   s   |pt  }| }|jdur!|jt| kr!tdt|  d dS td| ||jdu r,dn|j|j|jdur;|j	dng |j
|jd| dS )z
    Helper function to run benchmarks.
    Supports loading previous results for comparison, and saving current results to file.
    NzSkipping benchmark ""	optimized,)benchmark_fnr  optimized_labelfail_if_regressioncomparequietomit_baselinesr   )r  
parse_argsr   r   r2   benchmark_run_and_comparer   r  r  r   r  r  )r  r  
arg_parserkwargsargsr   r   r   benchmark_main_helper  s    

r  r  r   giUMu>g?)min_run_timeatol_srtolr  r  r  r  r  r  r  r   c          #         s  d}
g }g  t jt jt jdt jddddt| }ztj	tj
 ddd	dd
ddd}W n ttfyG   d}Y nw d
|vsTJ d| dd
|vs`J d| dt j|dd t }|D ]Q}d
|v rt|n| d}tt j|| dD ]6}t|}|D ])\}}|td ur|t dd |t< |jj|kr|
r||jj|jjf q||7 }qql|stj|dd}|}|D ]O}|rtt| n|dt| d z	| d$i |}W n) ty   Y q ty } zt|s |s
|d W Y d }~qd }~ww d }ztj  tj   tj! d }|D ]}t"j#}zzj|j$j%t&v}i }|rL|j$j%|t< t|j$|d|_$n|sZ|j$j|j$jf|v r_W W ~q+tj  t|j$|d|_$|j'|d}tj   (||f |jj%}tj! d | }||_)tj   tj! d }W n! ty } zt|s |s|d W Y d }~nd }~ww W ~n~w |s|| d| d q+W n! ty } zt|s |s|d W Y d }~nd }~ww |d ur|s fd d!} |tt*+t,t-t.|  t-t.| |  qt, | }!t*+|!  t/|!|  rO|d urOt j|| d
| d}"t0|"  td"|"  |r\t1 |||	d# d S d S )%NTXFORMERS_BENCHMARKS_CACHE~z.cachexformers
benchmarksrB   r)   r'   r   rC   cpuzlabel=`z` should not contain dotszenv=`)exist_okz.*z.csv@r   F)leavez====== z ======zSkipped (OOM)i   r   )r   )r  z: memory used: z MBc                    s8   | d j j d d j jko| d j j d d j jkS )Nr|   )r   r   r   r   r8   r   r   matches_current`  s   z2benchmark_run_and_compare.<locals>.matches_currentzSaved results to )	referencer  r   r   )2r   r   
expanduserr3   environr   r   ri   r`   get_device_namecurrent_devicer   RuntimeErrorAssertionErrormakedirsr   globr   r   	partitionr   r   r   r   r   tqdmr2   rq   writeNotImplementedError_is_oom_errorsynchronizereset_peak_memory_statsmax_memory_allocatedr   r   
_task_specr   r   blocked_autoranger6   r   r   Comparer   rT   filterr  r   _fail_if_regressions)#r  r  r  r  r  r  r  r  r  r   "SKIP_VANILLA_TASKS_IF_ALREADY_DONEresults_compare_tor   r   skip_vanilla_taskscmp_namename_with_envrZ   loadedmr   pbarcasebenchmarks_generatorer   	mem_beginbenchmark_objectmemoryis_optimizedr   r   r,  results_for_printwrite_to_pathr   r+  r   r    s6  














2




r  c                 C   s   t | tjjtjjjfS r   )r   ri   r`   OutOfMemoryErrorrn   r   	autotunerOutOfResources)rM  r   r   r   r:    s   r:  r-  c                 C   s  dd }i }| D ]
}|d |||< qd}d}d}	d}
t  }|D ]n}|d jjtv r+q ||}||v r:td| || ||vrH|
d7 }
q || }t|d j|j ||d j  |kr|j|d jk }|rn|d7 }n|d7 }|rvdnd}t||d|d j d	|j  q |	d7 }	q td
 td|  td|	  td|  |
dkrtd|
  ||	 | }|dkrt	d|
|krt	d|dkrt	dd S )Nc                 S   s:   | d  tddd | d jj| d jj| d jjfS )Nr   r   r'  r|   )r   r   r6  r   r   r   r   r*  r   r   r   get_measurement_id  s
   


z0_fail_if_regressions.<locals>.get_measurement_idr|   r   z%Duplicate benchmark in reference for IMPROVEDzREGRESS zref=znow=zRegression test summary:z  Better   : z  No change: z  Worse    : z  (no ref) : z!At least one benchmark regressed!zNo reference foundzNo benchmark was run)
r   r   r   r   
ValueErrorr   absr   r2   r2  )r8   r-  r  r   rW  id_to_resultr   
num_better	num_worsenum_nochangenum_unkreference_setrefbenchmark_idresis_now_bettercmpbenchmarks_runr   r   r   rB    sT   
&
$
rB  Tfwbw
cuda_graphc                    sD   s sJ dt tj f fdd}| |_t|fi | d S )Nr   c               	   ;   s      D ]m\}}z|d
i | di W n	 ty   Y qw  j}|r(dnd7 }|r0dnd7 } fdd}rd|  tj tj |  W d    n1 sYw   Y  fdd}tjdd|i|| j	d	V  qd S )Nrh  rg  r   c                      s    r    r   d S d S r   )rg  rh  r   )rO  rh  rg  r   r   run_one  s
   z<benchmark_main_helper2.<locals>.handle_case.<locals>.run_onec                      s       d S r   )replayr   )gr   r   rj    s   zfn()r   )r   globalsr   r   r   r   )
r7   r   r   ri   r`   	CUDAGraphgraphr   Timerr   )rK  r$   benchmark_clsr   rj  rh  ri  	functionsrg  )rO  rl  r   handle_case  s6   

z+benchmark_main_helper2.<locals>.handle_case)r   r   rp  r   r  )r   rs  rg  rh  ri  r  rt  r   rr  r   benchmark_main_helper2  s   #ru  c                  k   s6    |   }|  }tj| D ]
}tt||V  qd S r   )r,   r5   	itertoolsproductr   zip)r  r,   valsinstancer   r   r   product_dict  s   r{  b16f16f32)r   N)Nr   r?   )r   )Nr   r   )FFFr  )FFT)Nr	  
contextlibr   r   r   r5  rv  loggingr   r   r   collectionsr   r   dataclassesr   typingr   r   r   r   r	   r
   r   matplotlib.pyplotpyplotrO   numpyr   pandasr   seabornsnsri   r7  torch.utilsr   r   r   	Exceptionr   r`   is_available_triton_is_availablern   ImportErrorrM  warningr   r>   rq   r^   r{   r   r   contextmanagerr   r   r   r   r   r   r   r  r
  r  r  boolrN   r  r:  rB  ru  r{  rj   halfrl   	DTYPE2STRr   r   r   r   <module>   s   $


*
:&+
*%<$
		

 5
=
1