o
    ۗii                     @   s  d dl Z d dlZd dlZd dlZd dlmZ d dlmZ d dl	m
Z
 d dlZd dlZd dlmZ d dlmZ dZedkreed	Zejd
dd ejddedddd ejddeddd ejddeddd ejddeddd ejdd edd!d ejd"d#ed$dd% ejd&d'ed(dd% e ZejrejnejZejrejneeZeejZejd eej ej !ej"eZ#ej $e#Z%e#j&'e% e(e%ejZ)ej*+d)Z*e,e*dksJ e-e.d*d+ ej/+d)Z/d,e
e fd-d.Z0d/ej1 d0ej2 Z3e0e/e3g Z4d1d2 Z5d3d4 e6e/D Z7d5d4 e78 D Z7d6d4 e6e/D Z9d7d4 e98 D Z9d8d4 e6e/D Z/d9:d:d; e9; D Z<d<d; e98 D Z=e=d=ej1 d>ej2 g7 Z=e7; D ]Z>e>d?v skJ d@e> q]ej?j@jABe7ZCeCD 8 D ]\ZEZFe9Ge)jHeE eFi q{ej@jIe)e9e/eCdAZJej1ej2dBZKejLeJeKdCZMg ZHg ZNg ZOg ZPe6e)jHD ]6\ZQZReRe9vreHSeR eNSe/eR  eOSeR ePSe/eR  qeQeCjTv reHSeR eNSe/eR  qee/; eCZUdD:ee4eUgZVee WeMjXdE dFdG ZYi dHeVdIejdJe,eYdKdL:dMd; eZeYdddF eYdddF D d,dL:dNd; eZeOePD dOdL:dPd; eZeHeND dQdL:dRd; eOD dSe,eOdTe=dUeMj[j\dVej1dWdD:e<e3gdXe*d  dYe*d dZe*dF d[dZ]d\D ]@Z^ee_jd]e^  Z`ead^e4 dDeU d^e^ bd_Zcecdee`e jfd`i e] W d   n	1 sw   Y  q~dS dS )a    N)ArgumentParser)Path)List)kernel_suffix	ty_to_cppa  
Triton ahead-of-time compiler:

This program compiles the kernel with name `kernel-name` in the file at the
provided `path` into self-contained C source-code that embeds the `cubin`
data along with utilities to load, unload and launch the kernel.

signature is provided as a list of (optionally divisibility-hinted) types
or constexpr values, e.g.

`compile.py --kernel-name kernel --signature "*fp32:16, i32:16, 1024, i32" --out-name kernel /path/to/kernel.py`

will compile triton.JITFunction of name `kernel` inside the file `/path/to/kernel.py`.
Said kernel will be specialized such that argument 0, 1 are assumed to be multiple of 16,
and argument 2 is assumed to be a compile-time constant of value 1024, i.e. it won't be part of the generated prototype.

The resulting entry point will have signature

CUresult kernel_{specialization_suffix}(CUstream stream, unsigned gX, unsigned gY, unsigned gZ, float* arg0, int32_t arg1, int32_t arg2)

Different such specialized entry points can be combined using the `linker.py` script.

NOTE: when resolving the scope of /path/to/kernel.py, the file will be executed from within its parent directory with the python interpreter
used to run this `compile.py` script
__main__)descriptionpathzTPath to Python source containing desired kernel in its scope. File will be executed.)helpz--kernel-namez-n zName of the kernel to compileT)typedefaultr   requiredz--num-warpsz-w   z$Number of warps to launch the kernel)r   r   r   z--num-stagesz-ns   z/Number of stages (meta-parameter of the kernel)z
--out-namez-onz Out name for the compiled kernelz
--out-pathz-ozOut filenamez--signaturez-szSignature of the kernel)r   r   r   z--gridz-gzLaunch grid of the kernel,c                 C   s
   |  dS )N )strip)s r   R/home/ubuntu/transcripts/venv/lib/python3.10/site-packages/triton/tools/compile.py<lambda>G   s   
 r   	signaturec                 C   s,   t  }|d|   | d d S )Nr      )hashlibsha256updatejoinencode	hexdigest)r   mr   r   r   hash_signatureI   s   r"   warpsxstagesc                 C   sF   zt | }|W S  ty   Y nw zt| }|W S  ty"   Y d S w N)int
ValueErrorfloat)r   retr   r   r   	constexprQ   s   r*   c                 C   s,   i | ]\}}d |v r|t |d d qS ):r   )r*   split.0ir   r   r   r   
<dictcomp>^   s   , r0   c                 C      i | ]\}}|d ur||qS r%   r   r.   kvr   r   r   r0   _       c                 C   s    i | ]\}}t j| t|qS r   )kernel	arg_namesr*   r-   r   r   r   r0   `   s     c                 C   r1   r%   r   r2   r   r   r   r0   a   r5   c                 C   s4   i | ]\}}t j| tvrt j| |d d qS )r+   r   )r6   r7   	constantsr,   r-   r   r   r   r0   b   s
    xc                 C   s   g | ]}t |qS r   )str)r.   r4   r   r   r   
<listcomp>g   s    r;   c                 C   s   g | ]\}}| d | qS )=r   r2   r   r   r   r;   h   r5   z
num_warps=znum_stages=)r      z#Only 1 and 16 are valid hints, got )fnr8   r   attrs)	num_warps
num_stages)options_cubin   kernel_nametriton_kernel_namebin_sizebin_dataz, c                 C   s   g | ]\}}d | | qS )0xr   )r.   r9   yr   r   r   r;      r5   c                 C   "   g | ]\}}t | d | qS r   r   r.   nametyr   r   r   r;         " full_signaturec                 C   rM   rN   r   rO   r   r   r   r;      rR   arg_pointersc                 C   s   g | ]}d | qS )&r   )r.   argr   r   r   r;      s    num_argskernel_docstringsharedr@   	algo_infogridXgridYgridZ_placeholder)hczcompile..wr   )gbinasciir   importlib.util	importlibsysargparser   pathlibr   typingr   tritontriton.backendstriton.compiler.code_generatorr   triton.backends.nvidia.driverr   desc__name__parseradd_argumentr:   r&   
parse_argsargsout_namerG   out_pathr
   arg_pathinsertparentutilspec_from_file_locationstemspecmodule_from_specmodloaderexec_modulegetattrr6   gridr,   lenlistmapr   r"   r@   rA   meta_sigsig_hashr*   	enumeratehintsitemsr8   r   values	const_sig
doc_stringr_   backendscompilerAttrsDescriptor
from_hintsr?   get_constantspr4   r   r7   	ASTSourcesrcoptscompileccinfo	arg_typesarg_names_not_1arg_types_not_1r/   arg_nameappend
equal_to_1suffix	func_namehexlifyasmhex_zipmetadatarY   paramsext__file__template_pathwith_suffixopenfpwrite	read_textformatr   r   r   r   <module>   s    






.	
$ p