o
    پi4                     @  s  d dl mZ d dlZd dlZd dlZd dlZd dlZd dlZd dlZd dl	m
Z
 d dlmZmZ d dlmZmZmZmZmZ d dlZd dlmZ d dlZddlmZ d	ZeZd
d Zdd ZG dd dejjjZdd Z G dd dZ!G dd dZ"	dddZ#dS )    )annotationsN)Path)cached_propertypartial)DictTupleListOptionalAny)Tensor   )__version__quackc                   C  s   t t  dt S )N_HOME)osgetenvPACKAGE_NAMEupperr   home r   r   C/home/ubuntu/.local/lib/python3.10/site-packages/quack/autotuner.pyget_home_dir      r   c                   C  s   t jt dt dS )N.cache)r   pathjoinr   r   r   r   r   r   default_cache_dir    s   r   c                      s   e Zd Z fddZ  ZS )FileCacheManagerc                   sv   t  | tt  dd pt | _| jr7tj	
| j| j| _tj	
| jd| _tj| jdd d S td)N
_CACHE_DIR lockT)exist_okz$Could not create or locate cache dir)super__init__r   r   r   r   stripr   	cache_dirr   r   key	lock_pathmakedirsRuntimeError)selfr'   	__class__r   r   r$   %   s   zFileCacheManager.__init__)__name__
__module____qualname__r$   __classcell__r   r   r,   r   r   $   s    r   c                 C  s   t t| ddS )Nutf-8=)base64	b32encodebytesfromhexdecoderstripr'   r   r   r   _base322   s   r;   c                   @  sV   e Zd Z				ddddZedd Zd	d
 Zejj	dd Z
dd ZdddZdS )	AutotunerNFprune_configs_byOptional[Dict]c                   s  |st  g _n| _t|}| _i  _t|j  _|p,t	
t  dddk _g  _|dur:t| _t jdkrK fdd}	|	 _nd _t jdkr_ fdd}
|
 _nd _d _d	 _d _|r|d
 j _|d j _|d j _| _| _dS )a  
        :param prune_configs_by: a dict of functions that are used to prune configs, fields:
            'perf_model': performance model used to predicate running time with different configs, returns running time
            'top_k': number of configs to bench
            'prune_num_stages_by'(optional): a function used to prune num_stages. It takes configs:List[Config] as its input, and returns pruned configs.
        _CACHE_AUTOTUNINGN1r   c                   s    fddj D _d S )Nc                   s   i | ]	}| |   qS r   )clone).0namekwargsr   r   
<dictcomp>[       z9Autotuner.__init__.<locals>._pre_hook.<locals>.<dictcomp>)restore_valuerestore_copiesrD   r+   rD   r   	_pre_hookZ   r   z%Autotuner.__init__.<locals>._pre_hookc                   s*    j D ]}| |  j|  qi  _d S N)rH   copy_rI   )rE   	exceptionrC   rJ   r   r   
_post_hookc   s   

z&Autotuner.__init__.<locals>._post_hook      ?
perf_modeltop_kearly_config_prune)AutotuneConfigconfigsinspect	signaturekeysr   list
parameters	arg_namesr   r   r   r   cache_resultsrH   lenpre_hook	post_hookrQ   configs_top_krS   getfn	_do_bench)r+   rb   r'   rU   rH   r=   do_benchr\   rW   rK   rO   r   rJ   r   r$   8   s>   


zAutotuner.__init__c                 C  s"   | j d u rttjjdddS | j S )N      )warmuprep)rc   r   tritontestingrd   rJ   r   r   r   rd   y   s   
zAutotuner.do_benchc             
     s   t jt  dd dk}|rtdjj d|  | |j	 @ }|r2t
dd| dt|fi | i j fdd	}zj|d
dW S  tyz } z|retd|  tdtdtdgW  Y d }~S d }~ww )N_PRINT_AUTOTUNINGr@   zAutotuning kernel z with config zConflicting meta-parameters: , z8. Make sure that you don't re-define auto-tuned symbols.c               
     s   j d ur
  zjj i  W n ty3 }  zzjd ur,j| d W  W   d } ~ ww jd urBjd d d S d S )N)rN   )r^   rb   __call__	Exceptionr_   )eargscurrent
full_nargsr+   r   r   kernel_call   s(   




z%Autotuner._bench.<locals>.kernel_call)g      ?g?g?)	quantileszAutotuning failed with inf)r   environra   r   r   printrb   r.   rX   rE   
ValueErrorr   dict
all_kwargsnargsrd   rn   float)r+   configrq   metaverbose	conflictsrt   ro   r   rp   r   _bench   s&   "zAutotuner._benchc                   sj  |s|  d S | j }dd |D }t|tt|ksJ dtt|g| }td|d	 }t
t|}|jd d  d}||}	|	rtjt  dd	sd
d t||D  t|	d*}
t|
d } fdd|D }tj||jd| j|< || _d| _W d    d S 1 sw   Y  d S |  |jt|dd | j D d|d	d d S )Nc                 S  s   g | ]}t |qS r   str)rB   cr   r   r   
<listcomp>   s    z.Autotuner.check_disk_cache.<locals>.<listcomp>zConfig strings must be unique-r2      z.autotune.json_FORCE_CACHE_UPDATEFc                 S  s   i | ]\}}||qS r   r   )rB   sr   r   r   r   rF      s    z.Autotuner.check_disk_cache.<locals>.<dictcomp>rconfigs_timingsc                   s   i | ]	\}} | |qS r   r   )rB   r~   timing
str2configr   r   rF      rG   r:   r   c                 S  s   g | ]
\}}t ||fqS r   r   )rB   r~   timingsr   r   r   r      s    )r'   r   )binary)rb   r]   setVERSIONr   hashlibsha256r   encode	hexdigestr   r;   r.   get_filer   rw   ra   r   r   zipopenjsonloadbuiltinsminr   r   
bench_timeputdumpsitems)r+   
tuning_keyrU   bench_fnrb   config_str_list	cache_keyr   	file_namer   cached_configsr   r   r   r   check_disk_cache   sF   


zAutotuner.check_disk_cachec           
   	     s  t tj_d}tjdkri j}fdd| D   fddjD   D ]'\}}t|t	rY
t|j 
tdd | D  
t|j q2tjvrd}tjjfd	d
}jr| n|  j }njd }|_tt  dd dkr|stt djj djddj d jj i |! }	d _|	S )NTr   c                   s    i | ]\}}| j v r||qS r   )r[   )rB   kvrJ   r   r   rF           z&Autotuner.__call__.<locals>.<dictcomp>c                   s    g | ]}| v rt  | qS r   r   )rB   r'   )_argsr   r   r      r   z&Autotuner.__call__.<locals>.<listcomp>c                 S  s   g | ]
}|d v r
|ndqS )>   r   r      r   )rB   r   r   r   r   r      s    Fc                    s   t   }  fddD }t   }tt  dd dkr7| D ]\}}td| d|d dd	 q$||  _tj	||j
d
j< |_d S )Nc                   s"   i | ]}|j  d |iqS )r~   )r   rB   r~   )rq   rE   r+   r   r   rF      s    z9Autotuner.__call__.<locals>.benchmark.<locals>.<dictcomp>rk   r@   [z] -> r   z.3fmsr:   )timer   r   r   r   r   rx   r   r   r   ra   r   r   )bench_startr   	bench_endr~   time_)rq   r'   rE   pruned_configsr+   r   r   	benchmark   s   

z%Autotuner.__call__.<locals>.benchmarkr   rk   r@   z autotuning for function z finished after z.2fzs; best config selected: ;)"rz   r   r[   r|   r]   rU   r   rX   
isinstancer   appendr   shapestridedtypetupler   prune_configstorchcompilerdisabler\   r   best_configr   r   r   r   rx   rb   r.   r   rm   r{   )
r+   rq   rE   used_cached_resultall_args_argr   r~   retr   )r   rq   r'   rE   r   r+   r   rm      sV   



zAutotuner.__call__rE   r   return	List[Any]c                   s   j }jrjj jfi }jrTj}t|tr+|dkr+ttj | }n	t|ts4t	dt||krTfdd|D  t
   fdddd | }|S )NrP   zPError while pruning configs, top_k must be either 1) a float <= 1.0 or 2) an intc                   s,   i | ]}|j d i j | qS )r   )rQ   r|   r{   r   )rE   r+   r   r   rF     s    
z+Autotuner.prune_configs.<locals>.<dictcomp>c                   s    |  S rL   r   )x)
est_timingr   r   <lambda>#  s    z)Autotuner.prune_configs.<locals>.<lambda>r:   )rU   rS   r|   rQ   r`   r   r}   intr]   	TypeErrorsortedrX   )r+   rE   r   rR   r   )r   rE   r+   r   r     s"   
 zAutotuner.prune_configs)NNNF)r=   r>   )rE   r   r   r   )r.   r/   r0   r$   r   rd   r   r   r   r   r   rm   r   r   r   r   r   r<   7   s    A
+
&;r<   c                   @  s@   e Zd ZdZdd Zdd Zdd Zdd	 Zd
d Zdd Z	dS )rT   z
    An object that represents a possible kernel configuration for the auto-tuner to try.

    :ivar kwargs: a dictionary of meta-parameters to pass to the kernel as keyword arguments.
    :type kwargs: dict[Str, Any]
    c                 K  s
   || _ d S rL   rD   )r+   rE   r   r   r   r$   /  s   
zAutotuneConfig.__init__c                 C  s   | di | _d S )NrE   )ra   rE   )r+   stater   r   r   __setstate__2  s   zAutotuneConfig.__setstate__c                 C  s   | j S rL   rD   rJ   r   r   r   r{   5  s   zAutotuneConfig.all_kwargsc                 C  s6   g }| j  D ]\}}|| d|  qd|S )Nz: rl   )rE   r   r   r   )r+   resr   r   r   r   r   __str__8  s   
zAutotuneConfig.__str__c                 C  s   t t|    S rL   )hashr   r{   r   rJ   r   r   r   __hash__>  s   zAutotuneConfig.__hash__c                 C  s(   t |    }t |   }||kS rL   )r   r{   r   )r+   other
self_tupleother_tupler   r   r   __eq__A  s   zAutotuneConfig.__eq__N)
r.   r/   r0   __doc__r$   r   r{   r   r   r   r   r   r   r   rT   '  s    rT   Tc                   s6   dt   d d u rg  fdd}|S )Nzw
    Decorator for auto-tuning a function function.

    .. highlight:: python

    If the environment variable :code:`a  _PRINT_AUTOTUNING` is set to
    :code:`"1"`, we will print a message to stdout after autotuning each
    kernel, including the time spent autotuning and the best configuration.

    :param configs: a list of :code:`AutotuneConfig` objects
    :type configs: list[AutotuneConfig]
    :param key: a list of argument names whose change in value will trigger the evaluation of all provided configs.
    :type key: list[str]
    :param prune_configs_by: a dict of functions that are used to prune configs, fields:
        'perf_model': performance model used to predicate running time with different configs, returns running time
        'top_k': number of configs to bench
        'early_config_prune'(optional): a function used to do early prune (eg, num_stages). It takes configs:List[Config] as its input, and returns pruned configs.
    :param restore_value: a list of argument names whose value will be restored after evaluating any configs.
    :type restore_value: list[str]
    :param do_bench: a benchmark function to measure the time of each run.
    :type do_bench: lambda fn, quantiles
    :param cache_results: whether to cache autotune timings to disk.  Defaults to False.
    "type cache_results: bool
    c              	     s   t |  dS )N)rH   r=   rd   r\   )r<   )rb   r\   rU   rd   r'   r=   rH   r   r   	decoratorf  s   zautotune.<locals>.decorator)r   r   )rU   r'   r=   rH   rd   r\   r   r   r   r   autotuneG  s   r   )NNNNT)$
__future__r   r   r   r   rV   r4   r   r   pathlibr   	functoolsr   r   typingr   r   r   r	   r
   r   r   ri   r    r   r   r   r   r   runtimer   r   r;   r<   rT   r   r   r   r   r   <module>   s4    q!