o
    Hۂi|                  	   @  sl  d dl mZmZ d dlZd dlZd dlZd dlZd dlZd dlZd dl	Z	d dl
Z
d dlmZ d dlmZ d dlmZ d dlmZmZmZmZmZmZmZmZmZ d dlmZ d dlmZ d	d
lm Z  ddl!m!Z! ddlm"Z" d	dl#m$Z$m%Z%m&Z&m'Z' ddl(m)Z) d dl*m+Z+m,Z, dZ-dZ.edZ/G dd dej0Z1dRddZ2G dd dZ3dSddZ4G dd  d ee/ Z5d!d" Z6d#d$ Z7d%d& Z8G d'd( d(Z9eG d)d* d*Z:d+d, Z;d-d. Z<G d/d0 d0e9e5e/ Z=edTd3d4Z>edddddddd5dUd@d4Z>	dVdddddddd5dWdCd4Z>G dDdE dEZ?G dFdG dGZ@dHdI ZAdJdK ZBG dLdM dMe9ZCG dNdO dOe9ZDdPdQ ZEdS )X    )annotationsdivisionN)defaultdict)	dataclass)cached_property)	CallableGenericIterableOptionalTypeVaroverloadDictAnyTuple)BaseBackend)
ModuleType   )knobs   )driver)_async_compile)find_paths_ifget_iterable_pathtype_canonicalisation_dictis_namedtuple)get_cache_key)get_cache_invalidating_env_varsnative_specialize_implztriton.languagez"triton.experimental.gluon.languageTc                      s   e Zd ZdZd! fddZedd Zdd	 Zd
d Zd"ddZ	dd Z
dd Zdd Zdd Zdd Zdd Zdd Zdd Zdd  Z  ZS )#DependenciesFindera  
    This AST visitor is used to find dependencies of a JITFunction. This can
    be used to invalidate a JITFunction's hash when its source code -- or
    that of its dependencies -- changes.

    This visitor also keeps track of the global variables touched by the
    JITFunction.  When we launch the kernel, we check that these have the same
    values as they did when we ran this visitor.  If not, we raise an error (or
    otherwise we could recompile).
    returnNonec                   sV   t    || _t|d| _|| _|| _h d| _	t
tddh| _i | _d| _d S )Nutf-8>
   intlenmaxminlistfloatprintrangegetattr
isinstancecopymathF)super__init__namehashlibsha256encodehasherglobals	nonlocalssupported_python_builtinsGLUON_MODULETRITON_MODULEsupported_modulesused_global_valsvisiting_arg_default_value)selfr1   r6   r7   src	__class__ U/home/ubuntu/maya3_transcribe/venv/lib/python3.10/site-packages/triton/runtime/jit.pyr0   .   s   


zDependenciesFinder.__init__c                 C  
   | j  S N)r5   	hexdigestr>   rB   rB   rC   retY      
zDependenciesFinder.retc                 C  s&   t |jrdS t|dd}|tS )NT
__module__ )inspect	isbuiltinfuncr+   
startswithr:   )r>   noderN   modulerB   rB   rC   _is_triton_builtin]   s   
z%DependenciesFinder._is_triton_builtinc                 C  s   t |tsJ | j |j @ D ].}|\}}| j| \}}|j| \}}||kr?td| d| d| j d|j d| dq| j|j |j}|t	t
|dd7 }| j|d	 d S )
NGlobal variable z has value z when compiling z, but inner kernel z has conflicting value z7 from when it was first compiled.  This is not allowed.noinlineFr"   )r,   JITCallabler<   keysRuntimeErrorr1   __name__update	cache_keystrr+   r5   r4   )r>   rN   kvar_name_v1v2func_keyrB   rB   rC   _update_hashc   s   &zDependenciesFinder._update_hashNc                 C  s   ddl m} |d u st|tu rd S t|ddr%|jD ]}| | qd S t|ddr-d S t|dddkr7d S t|trC| 	| d S t
|rXt|tsXt||sXtd	| | jr]d S |d urot||f| j|t|f< d S )
Nr   	constexpr__triton_aggregate__F__triton_builtin__rJ   rK   ztriton.language.extra.libdevicez!Unsupported function referenced: )language.corerd   typer   r+   
hash_attrsrecord_referencer,   rU   rb   callablerW   r=   r-   deepcopyr<   id)r>   valvar_dictr1   rd   attrrB   rB   rC   rj   u   s*   


z#DependenciesFinder.record_referencec                   sd   t |jtju r|jS |j jv rd S  fdd}||j\}}|j jv r(|S  |||j |S )Nc                   sD    j | d }|d ur| j fS  j| d }|d ur | jfS dS )NNN)r6   getr7   )r1   rn   rG   rB   rC   name_lookup   s   

z2DependenciesFinder.visit_Name.<locals>.name_lookup)rh   ctxastStorerm   local_namesr8   rj   )r>   rP   rs   rn   ro   rB   rG   rC   
visit_Name   s   	zDependenciesFinder.visit_Namec                   s    fdd|j D S )Nc                   s   g | ]}  |qS rB   )visit).0eltrG   rB   rC   
<listcomp>       z2DependenciesFinder.visit_Tuple.<locals>.<listcomp>)eltsr>   rP   rB   rG   rC   visit_Tuple   s   zDependenciesFinder.visit_Tuplec                 C  sl   |  |j}t|tjr|  |j}t|tjst|dd}|d u s'|| jv r)d S t||j}| | |S )NrX   rK   )	ry   valuer,   ru   	Attributer+   r;   rp   rj   )r>   rP   lhslhs_namerH   rB   rB   rC   visit_Attribute   s   
z"DependenciesFinder.visit_Attributec                 C  s"   dd |j j D | _| | d S )Nc                 S  s   h | ]}|j qS rB   argrz   r   rB   rB   rC   	<setcomp>       z7DependenciesFinder.visit_FunctionDef.<locals>.<setcomp>)argsrw   generic_visitr   rB   rB   rC   visit_FunctionDef   s   z$DependenciesFinder.visit_FunctionDefc                   sn    fdd}t |j|j|jr|jgng |jD ]} | q||j |jd ur0 |j ||j	 d S )Nc                   sB   z j rJ d _ | D ]}|d ur | qW d _ d S d _ w )NTF)r=   ry   )defaultsexprrG   rB   rC   visit_defaults   s   

z:DependenciesFinder.visit_arguments.<locals>.visit_defaults)
	itertoolschainposonlyargsr   vararg
kwonlyargsry   kw_defaultskwargr   )r>   rP   r   r   rB   rG   rC   visit_arguments   s   (


z"DependenciesFinder.visit_argumentsc                 C  s:   |  |}t|tr|  jt|O  _d S | j| d S rE   )ry   r,   r'   rw   setadd)r>   rP   targetrB   rB   rC   visitAssnTarget   s   

z"DependenciesFinder.visitAssnTargetc                 C  s4   t |jdkrtd| |jd  | | d S )Nr   z2Simultaneous multiple assignment is not supported.r   )r$   targets	TypeErrorr   r   r   rB   rB   rC   visit_Assign   s   zDependenciesFinder.visit_Assignc                 C     |  |j | | d S rE   r   r   r   r   rB   rB   rC   visit_AnnAssign      z"DependenciesFinder.visit_AnnAssignc                 C  r   rE   r   r   rB   rB   rC   	visit_For  r   zDependenciesFinder.visit_For)r    r!   rq   )rX   rJ   __qualname____doc__r0   propertyrH   rR   rb   rj   rx   r   r   r   r   r   r   r   r   __classcell__rB   rB   r@   rC   r   "   s"    +

' 	r   r    r[   c                 C  s  dd l m  m} t| trZ|  } | dr/| d} t| } | ds'J d| dd   S | 	dr>dt| d d  S | drMdt| dd   S | drYt| dS n%t| |j
rhdt| j S t| |jrr| j} nt| tr{| j} nt| } t| d	d
| S )Nr   zconst const**kr   ztl._trK   )triton.language.corelanguagecorer,   r[   striprO   removeprefix_normalize_tyendswithpointer_type
element_tydtyper1   rh   rX   r   rr   replace)tyr   rB   rB   rC   r     s.   






r   c                   @  sr   e Zd ZdZdd	d
Zedd ZedddZedddZedd Z	edd Z
edd Zedd ZdS )KernelParamzBRepresents a parameter (name plus metadata) to a @jit'ed function.numr#   paraminspect.Parameterdo_not_specializebooldo_not_specialize_on_alignmentc                 C  s   || _ || _|| _|| _d S rE   )r   _paramr   r   )r>   r   r   r   r   rB   rB   rC   r0   .  s   
zKernelParam.__init__c                 C     | j jS rE   )r   r1   rG   rB   rB   rC   r1   5     zKernelParam.namer    r[   c                 C  s(   | j jr| j jtjjkrdS t| j jS )NrK   )r   
annotationrL   	Parameteremptyr   rG   rB   rB   rC   r   9  s   zKernelParam.annotationc                 C  sN   | j }|dr|dd  }n|dr|dd  }|tt v r%| j S dS )Nr   r   r   r   rK   )r   rO   r   r   values)r>   arB   rB   rC   annotation_type?  s   

zKernelParam.annotation_typec                 C  s
   d| j v S Nrd   )r   rG   rB   rB   rC   is_constexprJ  rI   zKernelParam.is_constexprc                 C  s    | j rdS d| jv p| jdS )NFr   r   )r   r   rO   rG   rB   rB   rC   is_constN  s   zKernelParam.is_constc                 C  r   rE   )r   defaultrG   rB   rB   rC   r   T  r   zKernelParam.defaultc                 C  s   | j jtjjkS rE   )r   r   rL   r   r   rG   rB   rB   rC   has_defaultX  s   zKernelParam.has_defaultN)r   r#   r   r   r   r   r   r   r    r[   )rX   rJ   r   r   r0   r   r1   r   r   r   r   r   r   r   rB   rB   rB   rC   r   +  s"    





r   Fc                 C  s   d}d}t t| |||d S )NFTr   )r   r   )r   
specializer   alignrB   rB   rC   mangle_type]  s   r   c                   @  s0   e Zd ZU ded< dd Zdd Zddd	Zd
S )KernelInterfacer   runc                O  s   | j ttj||dd|S )NTgridwarmup)r   map
MockTensor
wrap_dtype)r>   r   r   kwargsrB   rB   rC   r   f  s   zKernelInterface.warmupc                O     t d)Nzrun not implemented)NotImplementedError)r>   r   r   r   r   rB   rB   rC   r   i     zKernelInterface.runr    c                   s    fddS )z
        A JIT function is launched with: fn[grid](*args, **kwargs).
        Hence JITFunction.__getitem__ returns a callable proxy that
        memorizes the grid.
        c                    s   j |  dd|S )NFr   )r   )r   r   r   r>   rB   rC   <lambda>r  r}   z-KernelInterface.__getitem__.<locals>.<lambda>rB   )r>   r   rB   r   rC   __getitem__l  s   zKernelInterface.__getitem__N)r    r   )rX   rJ   r   __annotations__r   r   r   rB   rB   rB   rC   r   c  s
   
 r   c           	   	   C  sl   dd |  D }dd l}| |dd | D t| dd | D t| |j|d}||}|S )Nc                 S  s@   i | ]\}}||j jd krt|n|j jdkrd|jin|qS )r   rd   )rA   rX   r[   r   rz   keyr   rB   rB   rC   
<dictcomp>w  s
    z1serialize_specialization_data.<locals>.<dictcomp>r   c                 S     g | ]}t |qS rB   r'   rz   xrB   rB   rC   r|         z1serialize_specialization_data.<locals>.<listcomp>c                 S  r   rB   r   r   rB   rB   rC   r|     r   )r1   	signatureconstant_keysconstant_vals
attrs_keys
attrs_valsoptionsr   )itemsjsonrV   r'   r   __dict__dumps)	r1   r   	constantsattrsr   r   r   objserialized_objrB   rB   rC   serialize_specialization_datav  s   $
r   c              
   C  s  t | jt |ksJ g }t| j |D ]o\}}|jr&|d| d q|jr+dnd}|jr2dnd}|jr9dnd}d| d| d| d| d	}	|j	r~t
|j	trc|j	dksa|j	dd	 d
v rcd}|rs|d|j	 d|	 d q|d|j	 d q||	  qdd }
ddtt|
| j dg  dddd | j D  dd| d}dd | j D }t}||d< ||d< t|d< t|| |d S ) a2  
    Equivalent to sig.bind followed by apply_defaults. This generates a
    native Python function (using exec) which can be memoized on a per-kernel
    basis to avoid having to run these expensive functions -- which constitute
    much of the kernel launch overhead -- every time we run the kernel.
    z("constexpr", )TrueFalsezspecialize_impl(backend, , u1Nr   )fpbfFz("z",) + z[1:]z", None)c                 S  s0   | d j tjju r| d S | d  d| d  S )Nr   r   z	=default_r   rL   r   r   )r   rB   rB   rC   r     s   0 z0create_function_from_signature.<locals>.<lambda>z
def dynamic_func(z	**optionsz):
    params = {c                 S  s   g | ]
}d | d| qS )'z': rB   )rz   r1   rB   rB   rC   r|     s    z2create_function_from_signature.<locals>.<listcomp>z}
    specialization = [,z-]
    return params, specialization, options
c                 S  s,   i | ]\}}|j tjjurd | |j qS )default_r  )rz   r1   r   rB   rB   rC   r     s
    z2create_function_from_signature.<locals>.<dictcomp>specialize_implbackendrU   dynamic_func)r$   
parametersziprV   r   appendr   r   r   r   r,   r[   joinr'   r   r   r   rU   exec)sigkparamsr  specializationr1   kpr   r   r   rH   r   	func_bodyfunc_namespacer  rB   rB   rC   create_function_from_signature  sD   
r  c                 C  s   | j  d| j S )N.)rJ   r   fnrB   rB   rC   get_full_name     r  c                   @  sj   e Zd Zdd Zdd ZedddZd	d
 Zdd Zedd Z	dd Z
dd Zdd ZeeedZdS )rU   c              
   C  s   || _ t|| _zt|\| _| _W n ty% } ztd|d }~ww t|| _	t
 | _td| j}|td|tj d  }|| _d | _i | _|j| _|j| _|j| _|j| _|j| _d S )Nz1@jit functions should be defined in a Python filerK   z^def\s+\w+\s*\()r  rL   r   getsourcelinesraw_srcstarting_line_numberOSError
ValueErrorr  _fn_name	threadingRLock
_hash_locktextwrapdedentr  research	MULTILINEstart_srchashr<   r   rX   r   __globals__rJ   )r>   r  er?   rB   rB   rC   r0     s(   


zJITCallable.__init__c                 C  s   | j t| jjB S rE   )r*  rL   getclosurevarsr  r7   rG   rB   rB   rC   get_capture_scope     zJITCallable.get_capture_scoper    r[   c                   s   | j q | jd ur| jW  d    S d| j | _t| jj}t| j| j|| j	d}|
|   |jt| j | _tt|j | _ddlm  |  jt fdd| j D 7  _t| jd | _W d    | jS 1 sww   Y  | jS )Nz
recursion:)r1   r6   r7   r?   r   rc   c                   s*   g | ]\\}}\}}t | r||fqS rB   )r,   )rz   r1   r^   rn   rc   rB   rC   r|     s    z)JITCallable.cache_key.<locals>.<listcomp>r"   )r!  r)  r  rL   r,  r  r7   r   r*  r?   ry   parserH   r[   r  dictsortedr<   r   r   rd   r2   r3   r4   rF   )r>   r7   dependencies_finderrB   rc   rC   rZ     s*   

zJITCallable.cache_keyc                 C  s
   t | jS rE   )r)  rZ   rG   rB   rB   rC   __hash__     
zJITCallable.__hash__c                 C  sH   t | j}t|t jsJ t|jdksJ t|jd t js"J |S )Nr   r   )ru   r/  r(  r,   Moduler$   bodyFunctionDef)r>   treerB   rB   rC   r/    s
   zJITCallable.parsec                 C  s   ddl m} || S )Nr   )constexpr_type)r   r9  )r>   r9  rB   rB   rC   rh     s   zJITCallable.typec                 C  s   d| _ || _dS )a"  
        The only method allowed to modify src.
        Bypasses the __setattr__ restriction by calling super().__setattr__ directly.

        Note that it is the callers responsibility to make sure any triton functions that call this function have the `.hash` value reset to None.
        N)r)  r(  )r>   new_srcrB   rB   rC   _unsafe_update_src  s   
zJITCallable._unsafe_update_srcc                 C  r   )NzqCannot set attribute 'src' directly. Use '_unsafe_update_src()' and manually clear `.hash` of all callersinstead.)AttributeErrorrG   rB   rB   rC   _set_src!  r   zJITCallable._set_srcc                 C  s   | j S rE   )r(  rG   rB   rB   rC   _get_src&  s   zJITCallable._get_src)fgetfsetNr   )rX   rJ   r   r0   r-  r   rZ   r3  r/  rh   r;  r=  r>  r?   rB   rB   rB   rC   rU     s    "

rU   c                   @  s&   e Zd ZU ded< ded< ded< dS )JitFunctionInfor   rQ   r[   r1   JITFunctionjit_functionN)rX   rJ   r   r   rB   rB   rB   rC   rA  ,  s   
 rA  c                   sT   t |t|f}| |d }|d ur|S  fdd t |t| }|| |< |S )Nc                   st   t | tr fdd| D S t| r  fdd| D }| j| S t | tr0t fdd| D S t | tr8| jS | S )Nc                      g | ]} |qS rB   rB   r   replace_callablesrB   rC   r|   <  r   z@compute_cache_key.<locals>.replace_callables.<locals>.<listcomp>c                   rD  rB   rB   r   rE  rB   rC   r|   >  r   c                 3  s    | ]} |V  qd S rE   rB   r   rE  rB   rC   	<genexpr>A  s    z?compute_cache_key.<locals>.replace_callables.<locals>.<genexpr>)r,   r'   r   rA   tuplerU   rZ   )r   resultsrE  rB   rC   rF  :  s   



z,compute_cache_key.<locals>.replace_callables)rH  r[   rr   )kernel_key_cacher  r   r   rZ   rB   rE  rC   compute_cache_key3  s   rK  c                 C  s4   t | ts| S t| D ]
\}}t|| |< qt| S rE   )r,   r'   	enumerateconvert_to_tuple_if_listrH  )iteminested_valuerB   rB   rC   rM  K  s
   
rM  c                      s|   e Zd Zdd ZdddZdd Zd	d
 Zdd Zdd Zdd Z			d fdd	Z
dd Zdd Zdd Zdd Z  ZS )rB  c                 C     dS )NFrB   rG   rB   rB   rC   is_gluonY  s   zJITFunction.is_gluonr    bool | Nonec	                 C  s   |sd S | j j}	| j j}
ddd t| j|d D }|	 d|j d|j d|j d|j	 d	|j
 d
| d}t| j }t||||d ||}||||j|j|j|j	|j
|j|||d}|||t|
|	| d|i||ddS )Nr   c                 S  s    g | ]\}}|j  d | qS )z: r1   )rz   r   r   rB   rB   rC   r|   l       z*JITFunction._call_hook.<locals>.<listcomp>r   z[num_warps=z, num_ctas=z, num_stages=z, enable_fp_fusion=z, launch_cooperative_grid=](r   r   )r   devicer   	num_warpsnum_ctas
num_stagesenable_fp_fusionlaunch_cooperative_gridextern_libsconfigsspecialization_data	is_warmupr   F)r   reprr  compileis_manual_warmupalready_compiled)r  r   rJ   r  r	  paramsrX  rY  rZ  r[  r\  r  r   r]  rA  )r>   hookr   r   rW  r   r   r^  r`  r1   rQ   	arg_reprsra  	full_namer_  r   rB   rB   rC   
_call_hook\  s:    8


zJITFunction._call_hookc                 C  s   t |sJ | j| dS )z
        Add a hook that will be executed prior to the execution of run
        function with args and kwargs passed into the kernel
        N)rk   pre_run_hooksr
  )r>   rf  rB   rB   rC   add_pre_run_hook  s   zJITFunction.add_pre_run_hookc                 C  sZ   ddl m}m}m}m} tj }||}|| _|| _|| _t| j	| j
|}i i |||fS )z1
        Precompute as much as possible.
        r   )CompiledKernelrb  	ASTSourcemake_backend)compilerrl  rb  rm  rn  r   activeget_current_targetr  r   re  )r>   rl  rb  rm  rn  r   r  binderrB   rB   rC   create_binder  s   
zJITFunction.create_binderc                   s    |}dd | jD }dd |D }dd t||D }d|vs&J dd|vs.J d	d
|vs6J d|D ]}	|	|jvrI|	|vrItd|	 q8t|dd }
fdd|
D }
dd |D  t dd } fdd|D }|||
|fS )Nc                 S     g | ]}|j qS rB   rT  r   rB   rB   rC   r|     r   z*JITFunction._pack_args.<locals>.<listcomp>c                 S     g | ]}|d  qS )r   rB   r   rB   rB   rC   r|     r   c                 S  s   i | ]\}}||qS rB   rB   rz   r\   vrB   rB   rC   r     r}   z*JITFunction._pack_args.<locals>.<dictcomp>device_typez=device_type option is deprecated; current target will be usedrW  z8device option is deprecated; current device will be usedstreamz8stream option is deprecated; current stream will be usedz2Keyword argument %s was specified but unrecognisedc                 S  s   |dkS r   rB   )r^   rn   rB   rB   rC   r     s    z(JITFunction._pack_args.<locals>.<lambda>c                   s    i | ]}|t t  |qS rB   )r   r'   r   )rz   path)
bound_argsrB   rC   r     rU  c                 S  ru  )r   rB   r   rB   rB   rC   r|     r   c                 S  s
   t |tS rE   )r,   r[   )r^   r   rB   rB   rC   r     s   
 c                   s   i | ]}| t |qS rB   )
parse_attrr   )rz   r\   )attrvalsr  rB   rC   r     s    )parse_optionsre  r	  r   KeyErrorr   )r>   r  r   r{  r  r   sigkeyssigvalsr   r\   
constexprsr   rB   )r}  r  r{  rC   
_pack_args  s"   
zJITFunction._pack_argsc              
   O  s  | d| jp
tjj|d< tjj|d< tj }tj	|}| j
D ]	}||i | q!| j| \}}	}
}}||i |\}}}t|	||}| |d }|d u rm| |||||\}}}}| |||||||}|d u rmd S t }| j D ]\\}}\}}| || }|krtd| d| d| qu|s|d usJ t|r||}t|}|d }|dkr|d nd}|dkr|d nd}t|d	r| }|j||g| R  }|j|||||j|j|tjjtjjg	| R   |S )
Ndebuginstrumentation_moderS   z1 has changed since we compiled this kernel, from z to r   r   r   result)rr   r  r   runtimecompilationr  r   rp  get_current_deviceget_current_streamrj  device_cachesrK  r  _do_compileobjectr<   r   rW   rk   r$   hasattrr  launch_metadatar   r   functionpacked_metadatalaunch_enter_hooklaunch_exit_hook) r>   r   r   r   r   rW  ry  rf  kernel_cacherJ  r   r  rr  r{  r  r   r   kernelr   r  r   not_presentr1   r^   rn   globals_dictnewVal	grid_sizegrid_0grid_1grid_2r  rB   rB   rC   r     sR   


zJITFunction.runc                 C  s   | j d u r| jS |  |S rE   )_reprr  )r>   r^   rB   rB   rC   ra       zJITFunction.reprNc	                   s   |r|ng }|r
|ng }t  | |j| _|| _|| _|| _|| _|| _g | _	t
| jj D ]!\}	}
|	|v p<|
j|v }|	|v pE|
j|v }| j	t|	|
|| q0t| j| _d | _|| _|| _dd | j	D | _dd | j	D | _g | _d S )Nc                 S  rt  rB   rT  rz   prB   rB   rC   r|     r   z(JITFunction.__init__.<locals>.<listcomp>c                 S  s   g | ]}|j r|jqS rB   )r   r   r  rB   rB   rC   r|     s    )r/   r0   rJ   rQ   versionr   r   r  r  re  rL  r   r  r   r1   r
  r   r   rs  r  r  r  rT   	arg_namesr  rj  )r>   r  r  r   r   r  rT   ra  r  rO  r   dnsdns_oar@   rB   rC   r0     s*   
zJITFunction.__init__c              	     s  dd l }dd lm  tj }||}|d | jkr(td|d  d| j t	t
|d }|d } fddt||D }t	t
|d	 }|d
 }	tt||	}
dd |d  D }dd |d  D }|d }| j| \}}}}}||}| j||||||
ddS )Nr   r1   zSpecialization data is for z but trying to preload for r   r   c                   sL   i | ]"\}}| j |r  |nt|tr"d |v r" |d  n|qS rc   )r   is_dtyper,   r0  rd   r   tlrB   rC   r     s    "z'JITFunction.preload.<locals>.<dictcomp>r   r   c                 S  s   i | ]	\}}|t |qS rB   )rM  r   rB   rB   rC   r   (      r   c                 S  s(   i | ]\}}|t |trt|n|qS rB   )r,   r'   rH  r   rB   rB   rC   r   )  s    r   r   T)r   )r   triton.languager   r   rp  r  loadsr  rW   r   rH  r	  r0  r   r  r~  r  )r>   r_  r   rW  deserialized_objr   r   r  r   r   r   r   r   r   r^   r  rB   r  rC   preload  s@   




zJITFunction.preloadc              
     s   j  \}
}	}tjj grd S  	tj }
|
d urWt	 t
	|	}	
fdd} f	dd}|
|||}|S j	
jd}|< tjj g |S )Nc                     s   j j dS )N)r   r   	_env_vars)rb  r   rB   )env_varsr   r>   r?   r   rB   rC   async_compileG  r.  z.JITFunction._do_compile.<locals>.async_compilec              
     s*   | <  tjj g d S rE   )ri  r   r  jit_post_compile_hook)r  )	r   r  rW  r  r   r   r>   r   r   rB   rC   finalize_compileJ  s   z1JITFunction._do_compile.<locals>.finalize_compile)r   r   )r  ri  r   r  jit_cache_hookrm  r   active_moderr   r   r   submitrb  r   r  )r>   r   r   rW  r  r   r   r   r^   r  
async_moderZ   r  r  r  rB   )r   r  rW  r  r  r   r   r>   r   r?   r   r   rC   r  :  s$   
zJITFunction._do_compilec                 O  r   )Nz:Cannot call @triton.jit'd outside of the scope of a kernel)rW   r>   r   r   rB   rB   rC   __call__W  r   zJITFunction.__call__c                 C  s   d| j  d| jj dS )NzJITFunction(:r   )rQ   r  r   rG   rB   rB   rC   __repr__Z  s   zJITFunction.__repr__)r    rS  )NNNNNNN)rX   rJ   r   rR  ri  rk  rs  r  r   ra  r0   r  r  r  r  r   rB   rB   r@   rC   rB  W  s    
.5$'rB  r  JITFunction[T]c                 C     d S rE   rB   r  rB   rB   rC   jitc     r  r  ra  r  r   r   r  rT   ra  Optional[Callable]r  r   Optional[Iterable[int | str]]r   r  Optional[bool]rT   Callable[[T], JITFunction[T]]c                 C  r  rE   rB   r  rB   rB   rC   r  h  s   Optional[T]KernelInterface[T]c          	        s.   d fdd}| dur|| S |S )	a<  
    Decorator for JIT-compiling a function using the Triton compiler.

    :note: When a jit'd function is called, arguments are
        implicitly converted to pointers if they have a :code:`.data_ptr()` method
        and a `.dtype` attribute.

    :note: This function will be compiled and run on the GPU. It will only have access to:

           * python primitives,
           * builtins within the triton package,
           * arguments to this function,
           * other jit'd functions

    :param fn: the function to be jit-compiled
    :type fn: Callable
    r  r   r    r  c              
     sP   t | sJ tjjrddlm} ||  dS t|  dS )Nr   )InterpretedFunction)r  r   r   r  rT   ra  r  )rk   r   r  	interpretinterpreterr  rB  )r  r  r  r   r   r  rT   ra  r  rB   rC   	decorator  s"   zjit.<locals>.decoratorNr  r   r    r  rB   )	r  r  ra  r  r   r   r  rT   r  rB   r  rC   r  v  s   c                   @  sF   e Zd ZdZedd ZdddZdd Zed	d
 Zedd Z	dS )r   zr
    Can be used in place of real tensors when calling:
        kernel.warmup(MockTensor(torch.float32), ...)
    c                 C  s"   | j jdkr| jdkrt| S | S )Nr   torch)rA   rX   rJ   r   r   rB   rB   rC   r     s   zMockTensor.wrap_dtypeNc                 C  s   |d u rdg}|| _ || _d S )Nr   )r   shape)r>   r   r  rB   rB   rC   r0     s   
zMockTensor.__init__c                 C  s8   dg}| j dd  D ]}||d |  q
tt|S )Nr   r   )r  r
  rH  reversed)r>   stridessizerB   rB   rC   stride  s   zMockTensor.stridec                   C  rQ  Nr   rB   rB   rB   rB   rC   data_ptr  r  zMockTensor.data_ptrc                   C  rQ  r  rB   rB   rB   rB   rC   	ptr_range  r  zMockTensor.ptr_rangerE   )
rX   rJ   r   r   staticmethodr   r0   r  r  r  rB   rB   rB   rC   r     s    


r   c                   @  s^   e Zd Zdd Zdd Zdd Zdd	d
Zdd Zdd Zdd Z	dd Z
dd Zdd ZdS )TensorWrapperc                 C  s*   || _ || _|j| _|j| _| jj| _d S rE   )r   basedatarW  r  )r>   r  r   rB   rB   rC   r0     s
   zTensorWrapper.__init__c                 C  rD   rE   )r  r  rG   rB   rB   rC   r    r4  zTensorWrapper.data_ptrc                 G  s   | j j| S rE   )r  r  )r>   r   rB   rB   rC   r    s   zTensorWrapper.strider    r[   c                 C  s   d| j  d| j dS )NzTensorWrapper[rV  r   )r   r  rG   rB   rB   rC   __str__  s   zTensorWrapper.__str__c                 C  rD   rE   )r  element_sizerG   rB   rB   rC   r    r4  zTensorWrapper.element_sizec                 C     t | j | jS rE   )r  r  cpur   rG   rB   rB   rC   r    r  zTensorWrapper.cpuc                 C  s   | j |j  d S rE   )r  copy_)r>   otherrB   rB   rC   r    r  zTensorWrapper.copy_c                 C  r  rE   )r  r  cloner   rG   rB   rB   rC   r    r  zTensorWrapper.clonec                 C     t | j|| jS rE   )r  r  tor   )r>   rW  rB   rB   rC   r    r.  zTensorWrapper.toc                 C  r  rE   )r  r  	new_emptyr   )r>   sizesrB   rB   rC   r    r.  zTensorWrapper.new_emptyNr   )rX   rJ   r   r0   r  r  r  r  r  r  r  r  r  rB   rB   rB   rC   r    s    
r  c                 C  sP   t | tr|| jjkr| jS t| j|S t| drt| |S tdt|  d)Nr  zCannot reinterpret a r  )r,   r  r  r   r  r   rh   )tensorr   rB   rB   rC   reinterpret  s   


r  c                 C  sj   | }t |ts|j}t |tr|jjj}|j}t|jD ]\}}| 	dr0||7 } ||fS q||fS )Nzdef )
r,   rU   r  __code__co_filenamer  rL  r  r   rO   )r  base_fn	file_name
begin_lineidxlinerB   rB   rC   get_jit_fn_file_line  s   


r  c                   @  s(   e Zd Zdd Zedd Zdd ZdS )BoundConstexprFunctionc                 C  s   || _ || _d S rE   )__self____func__)r>   instancer  rB   rB   rC   r0     s   
zBoundConstexprFunction.__init__c                 C  r   rE   )r  rZ   rG   rB   rB   rC   rZ     r   z BoundConstexprFunction.cache_keyc                 O  s   | j | jg|R i |S rE   )r  r  r  rB   rB   rC   r  #  r  zBoundConstexprFunction.__call__N)rX   rJ   r   r0   r   rZ   r  rB   rB   rB   rC   r    s
    
r  c                      s2   e Zd Z fddZdd ZddddZ  ZS )	ConstexprFunctionc                   s   t  | d S rE   )r/   r0   )r>   r  r@   rB   rC   r0   )  s   zConstexprFunction.__init__c                 C  s   |d ur	t || S | S rE   )r  )r>   r   objclassrB   rB   rC   __get__,  s   
zConstexprFunction.__get__N)	_semanticc                  sh   ddl m m}  fdd|D } fdd| D }| j|i |}|d u r*|S tjjr0|S ||S )Nr   )_unwrap_if_constexprrd   c                   rD  rB   rB   r   r  rB   rC   r|   5  r   z.ConstexprFunction.__call__.<locals>.<listcomp>c                   s   i | ]	\}}| |qS rB   rB   rv  r  rB   rC   r   6  r  z.ConstexprFunction.__call__.<locals>.<dictcomp>)r   r  rd   r   r  r   r  r  )r>   r  r   r   rd   resrB   r  rC   r  2  s   zConstexprFunction.__call__)rX   rJ   r   r0   r  r  r   rB   rB   r@   rC   r  '  s    r  c                 C  s   t | S )z
    Wraps an arbitrary Python function so that it can be called at
    compile-time on constexpr arguments in a Triton function and
    returns a constexpr result.
    )r  r  rB   rB   rC   constexpr_functionE  s   r  r   )Fr  )ra  r  r  r  r   r  r   r  r  r  rT   r  r    r  rE   )r  r  ra  r  r  r  r   r  r   r  r  r  rT   r  r    r  )F
__future__r   r   ru   r-   r2   rL   r   r  r$  r"  collectionsr   dataclassesr   	functoolsr   typingr   r   r	   r
   r   r   r   r   r   triton.backendsr   typesr   rK   r   r   r   _utilsr   r   r   r   cacher   triton._C.libtritonr   r   r:   r9   r   NodeVisitorr   r   r   r   r   r   r  r  rU   rA  rK  rM  rB  r  r   r  r  r  r  r  r  rB   rB   rB   rC   <module>   s    , 
p
2<e  <!%