o
    i                     @   s   d dl Z d dlZd dlZd dlZd dlZeddZ						dddZdad	d
 Z	dd Z
dd Zdd Zdd ZdddZdd Zdd ZdS )    NTORCHAO_AUTOTUNER_DATA_PATH   d   Tmeanc                    s  |dv sJ dd l  	 |    j  |r! jtd jdd}n jtd jdd} jjdd} jjdd}	|  td	D ]	}
|	  |   qB|	   j  |
|	d	 }td
t|| }td
t|| } fddt|D } fddt|D }	t|D ]}
|   qt|D ]!}|d ur|D ]}d |_q|	  ||   |   |	|   q j   jdd t||	D  jd}|d ur | j| jd }t|d
kr|d }|S t || S )N)minmaxr   medianr       Acudadtypedeviceg    ATenable_timing      c                       g | ]	} j jd dqS Tr   r
   Event.0itorch L/home/ubuntu/.local/lib/python3.10/site-packages/torchao/kernel/autotuner.py
<listcomp>G       z#do_bench_triton.<locals>.<listcomp>c                    r   r   r   r   r   r   r   r   H   r   c                 S   s   g | ]	\}}| |qS r   )elapsed_time)r   ser   r   r   r   ]   r   )r   )r   r
   synchronizeemptyintint8r   recordrangezero_r   r   gradtensorzipfloatquantiletolistlengetattritem)fnwarmuprepgrad_to_none	quantiles
fast_flushreturn_modecachestart_event	end_event_estimate_msn_warmupn_repeatr   xtimesretr   r   r   do_bench_triton   sR   	


rC   c                 C   s   t j }td u rtj d }ntt}td| d|  t	|d}dd l
}td|  || | W d    d S 1 sDw   Y  d S )Nzdata.pklzTrying to store configs for z locally under wbr   zSaving best configs to file )r   r
   get_device_nameAUTOTUNER_DATA_PATHpathlibPathcwdlogginginfoopenpickledump)best_configsdevice_namesaved_configsfrM   r   r   r   _save_best_configsj   s   

"rS   c                  C   s   t j } dd l}td u r&|jd}|d d d }| ds%t	d nt
t}t	d|  d	|  | rbdd l}t|d
}t	d|  ||W  d    S 1 s[w   Y  d S d S )Nr   torchaokernelconfigszdata_a100.pklzNVIDIA A100z6Warning! Loaded configurations are optimized for A100!zTrying to load configs for z from rbzLoading best configs from file )r   r
   rE   	importlibrF   	resourcesfiles
startswithrJ   rK   rG   rH   is_filerM   rL   load)rP   rX   rQ   rM   rR   r   r   r   _load_best_configsz   s"   



$r^   c                 C   s$   t | r| j|  |  fS | fS N)r   	is_tensorr   sizestride)ar   r   r   get_arg_key   s   
rd   c                 C   s   t tdd | D dS )Nc                 s   s    | ]}t |V  qd S r_   )rd   )r   rc   r   r   r   	<genexpr>   s    zget_args_key.<locals>.<genexpr>r   )sumtuple)argsr   r   r   get_args_key   s   ri   c                 C   s   |   t j  t jtdt jdd}|  t jjdd}t jjdd}|  t|D ]}|   q-|  t j  |	|| }|S )Nr	   r
   r   Tr   )
r   r
   r"   r#   r$   r(   r   r&   r'   r   )r2   r4   r9   r:   r;   r<   r=   r   r   r   do_bench_basic   s   

rj   c                    s    fdd}zt |d}W n ty   d }Y n tjjy&   d }Y nw |d u s5|d ur9||d kr9tdS t |d}|d urL||d krLtdS t|S )Nc                      s    g  S r_   r   r   rh   configr2   r   r   
wrapped_fn   s   zdo_bench.<locals>.wrapped_fnr   r   infr   
   )rj   RuntimeErrortritonruntimeOutOfResourcesr,   rC   )r2   rh   rl   	best_timerm   timer   rk   r   do_bench   s   
rv   c                 C   s   | t v r
t |  d S d S )Nr   )BEST_CONFIGS)keyr   r   r   get_best_config_by_key   s   ry   c           	   	   C   s4  t d u rt a t d u ri a t|dkrd S t|}t|}|d ur#|S td| d |d }t| ||d }tdt	t
|||g d}|dd  D ]-}t| |||}td|ddt|d|dt
|g ||k rw|}|}|d7 }qN||ft |< td	 tdt	t
||g tt  |S )
Nr   z2Starting autotune search. No config found for key . r   4d/z6.3fz-- perfetto --)rw   r^   r/   ri   ry   rJ   rK   rv   joinmapstrrS   )	r2   rh   rV   rx   best_configrt   r   rl   ru   r   r   r   get_best_config_fn   s:   (

r   )r   r   NNTr   r_   )rJ   osrG   r   rq   getenvrF   rC   rw   rS   r^   rd   ri   rj   rv   ry   r   r   r   r   r   <module>   s,   
W
