o
    c۷i                     @   s  U d Z ddlZddlZddlZddlZddlZddlZddlZddlZddl	m
Z
mZ ddlmZ ddlmZ ddlmZ ddlZddlmZ ddlZedddkZeed	< ed
dZedB ed< dZeed< dd ZdefddZedddefddZda dd Z!G dd dZ"dZ#dZ$da%e
dB ed< de
fddZ&d e'defd!d"Z(d#ede)fd$d%Z*d#eddfd&d'Z+d e'fd(d)Z,dS )*u  Persistent .so cache for CuTe DSL compiled kernels.

Compiled kernels are exported as shared libraries (.so) via export_to_c.
On subsequent runs the .so is loaded via dlopen (~1ms) instead of
re-generating IR + re-JIT'ing (~100ms per kernel).

Controls:
  QUACK_CACHE_ENABLED=0       — disable persistent .so cache (default: enabled)
  QUACK_CACHE_DIR=path        — override default cache directory
    N)	CCompilernew_compiler)	lru_cache)getuser)PathQUACK_CACHE_ENABLED1CACHE_ENABLEDQUACK_CACHE_DIR	CACHE_DIRFCOMPILE_ONLYc                  O   s   d S N )argskwargsr   r   G/home/ubuntu/vllm_env/lib/python3.10/site-packages/quack/cache_utils.py_noop_kernel#   s   r   returnc                  C   s:   t d ur	tt } ntt t  d } | jddd | S )Nquack_cacheTparentsexist_ok)r   r   tempfile
gettempdirr   mkdir)	cache_dirr   r   r   get_cache_path'   s
   
r      )maxsizec                  C   s   t t j} t }|dtjj	 dtjj
   |dtj   |dtj   t| dD ]'}| s@q9|||    | }|t|dd || q9| S )zOHash all quack Python sources plus runtime ABI stamps into a short fingerprint.py.zcutlass=ztvm_ffi=z*.py   little)r   __file__resolveparenthashlibsha256updatesysversion_infomajorminorencodecutlass__version__tvm_ffisortedrglobis_filerelative_toas_posix
read_byteslento_bytes	hexdigest)
quack_roothsrccontentr   r   r   _compute_source_fingerprint0   s   "r>   c                  C   s@   t rd S tjjddD ]} t|  rtj| tjd qda d S )NFenable_tvm_ffi)modeT)	_runtime_libs_loadedcuteruntimefind_runtime_librariesr   existsctypesCDLLRTLD_GLOBAL)pathr   r   r   _ensure_runtime_libsJ   s   rK   c                   @   s<   e Zd ZdZddededefddZdd	d
ZdddZ	dS )FileLockz2Advisory file lock using fcntl.flock with timeout.   	lock_path	exclusivetimeoutc                 C   s   || _ || _|| _d| _d S )N)rN   rO   rP   _fd)selfrN   rO   rP   r   r   r   __init__\   s   
zFileLock.__init__r   c                 C   s   | j r	tjtjB ntjtjB }| j rtjntj}tt	| j
|| _t | j }t |k rQzt| j|tjB  | W S  tyJ   td Y nw t |k s/t| j d| _td| j
 )Ng?rQ   zTimed out waiting for lock: )rO   osO_WRONLYO_CREATO_RDONLYfcntlLOCK_EXLOCK_SHopenstrrN   rR   time	monotonicrP   flockLOCK_NBOSErrorsleepcloseRuntimeError)rS   flags	lock_typedeadliner   r   r   	__enter__b   s   zFileLock.__enter__Nc                 G   s4   | j dkrt| j tj t| j  d| _ d S d S )Nr   rQ   )rR   rY   r`   LOCK_UNrU   rd   )rS   excr   r   r   __exit__q   s
   

zFileLock.__exit__)rM   )r   rL   )r   N)
__name__
__module____qualname____doc__r   boolfloatrT   ri   rl   r   r   r   r   rL   Y   s
    
rL   func<   	_compilerc                   C   s   t d u rt a t S r   )ru   r   r   r   r   r   _get_compiler   s   rv   keyc                 C   s   t t|  S r   )r&   r'   pickledumpsr9   )rw   r   r   r   _key_to_hash   s   rz   so_pathc                 C   s"   t   tjjt| dd}|t S )NTr?   )rK   rC   rD   load_moduler]   EXPORT_FUNC_NAME)r{   mr   r   r   _load_from_so   s   r   c                 C   sP   |j jddd |d}| jt|td t t|gt| |  d S )NTr   z.o)object_file_pathfunction_name)	r%   r   with_suffixexport_to_cr]   r}   rv   link_shared_objectunlink)compiled_fnr{   obj_pathr   r   r   _export_to_so   s   
r   c              
   C   sb  t s| }tr	tS |S t t  }|jddd t| }|| d }|| d }z5t|dtd$ |	 rOtrCtW  d   W S t
|W  d   W S W d   n1 sYw   Y  W n	 tyh   Y nw | }z"t|dtd |	 s~t|| W d   n1 sw   Y  W n ty } ztd| d	|  W Y d}~nd}~ww trtS |S )
aT  Check persistent .so cache; on miss, call compile_fn() and export.

    Args:
        key: Hashable tuple identifying this compilation (include source fingerprint).
        compile_fn: Zero-arg callable that returns a compiled CuTe DSL function.

    Returns:
        The compiled function (either loaded from .so or freshly compiled).
    Tr   z.soz.lockF)rO   rP   Nz#quack cache: export failed for key z: )r	   r   r   r   r>   r   rz   rL   LOCK_TIMEOUTrF   r   re   r   	Exceptionprint)rw   
compile_fnr   
cache_pathshar{   rN   er   r   r   compile_and_cache   sF   

 r   )-rp   rG   rY   r&   rU   rx   r)   r   r^   distutils.ccompilerr   r   	functoolsr   getpassr   pathlibr   r.   cutlass.cuterC   r0   getenvr	   rq   __annotations__r   r]   r   r   r   r>   rB   rK   rL   r}   r   ru   rv   tuplerz   objectr   r   r   r   r   r   r   <module>   sD   	#