o
     i$                     @   s.  U d dl Z d dlZd dlZd dlZd dlZd dlZd dlZd dlmZ d dl	m
Z
mZmZ d dlZG dd de jZG dd de jZG dd	 d	eZG d
d deZdai aeeee f ed< ejddkZejddkZdadddZG dd deZedejfde defddZ!ee
 Z"dS )    N)Enum)AnyDictListc                   @   $   e Zd Zdd Zdd Zdd ZdS )_ForLoopUnrollerc                 C   s   || _ || _|| _d S N)	loop_itertargetinline_variables)selfr
   r   r	    r   Q/home/ubuntu/.local/lib/python3.10/site-packages/xformers/triton/vararg_kernel.py__init__   s   
z_ForLoopUnroller.__init__c                 C   s    |j | jkr|S tt| jS r   )idr
   astNamestrr	   r   noder   r   r   
visit_Name   s   z_ForLoopUnroller.visit_Namec                 C   sT   t |jtjr(|jj| jkr(t |jtjr(|jj| jv r(t|jj | j S |S r   )	
isinstanceslicer   r   r   r
   valuer   r	   r   r   r   r   visit_Subscript   s   z _ForLoopUnroller.visit_SubscriptN)__name__
__module____qualname__r   r   r   r   r   r   r   r      s    r   c                   @   r   )_VisitorVarargKernelc                 C   s   t  | _|| _d S r   )setr   N)r   r    r   r   r   r   +   s   
z_VisitorVarargKernel.__init__c                 C   s   |j d u r(|jdkr(t|jtjr(t|jtjr(|jj dkr(| j	|jj
 g S |j d ur4| |j |_ |jd ur@| |j|_|jd urL| |j|_|S )N   VAR_ARGS_ARRAY)r   simpler   r
   r   r   
annotationConstantr   addr   visitr   r   r   r   visit_AnnAssign/   s    




z$_VisitorVarargKernel.visit_AnnAssignc                    s   g }j D ]/  jd ur/t jtjr/ jjdkr/| j j | fddt	| j
D 7 }q|  qjd ur[| jjj |fddt	| j
D 7 }d _|j7 }g _|_ S )Nr"   c                    s    g | ]}t  j | qS r   )r   arg.0i)r)   r   r   
<listcomp>M   s     z8_VisitorVarargKernel.visit_arguments.<locals>.<listcomp>c                    s"   g | ]}t  jj | qS r   )r   r)   varargr*   )r   r   r   r-   R   s   " )argsr$   r   r   r%   r   r   r&   r)   ranger    appendr.   
kwonlyargs)r   r   new_argsr   )r)   r   r   visit_argumentsC   s$   



z$_VisitorVarargKernel.visit_argumentsN)r   r   r   r   r(   r4   r   r   r   r   r   *   s    r   c                   @   s   e Zd Zdd ZdS )_VisitorUnrollKernelc                    s  t |jtjrB|jjjdksBt|jjdksBt |jjd tjrB|jjd jjdksBt|jjd jdksB|jjd jd j jvrO fdd|j	D |_	|S g }t
 jD ])}t|jj j|d}|j	D ]}t|}t||} |}|| qeqV|S )Nr0   r!   r   lenc                    s   g | ]}  |qS r   )r'   )r+   xr   r   r   r-   e   s    z2_VisitorUnrollKernel.visit_For.<locals>.<listcomp>)r
   r   r	   )r   iterr   Callfuncr   r6   r/   r   bodyr0   r    r   r
   copydeepcopyfix_missing_locationsr'   r1   )r   r   	new_nodesr,   unrollerr<   new_noder   r8   r   	visit_For[   s2   


z_VisitorUnrollKernel.visit_ForN)r   r   r   rC   r   r   r   r   r5   Z   s    r5   c                       s,   e Zd Z fddZdd Zdd Z  ZS )_VisitorConditionalKernelc                    s   t  j|i | d | _d S r   )superr   extra_nodes)r   r/   kwargs	__class__r   r   r   x   s   
z"_VisitorConditionalKernel.__init__c                 C   s   t |jtjr| |j|_|S |jj| jv rlt |jtjrld g| j	 }t|jj | j	d  |d< t
t| j	d D ]*}t|jt gt|g}t|jj | }tj||||d  d||< q=|d S |S )Nr!   )testr<   orelser   )r   r   r   	Subscriptr   r   r   r   r   r    reversedr0   CompareEqr%   IfExp)r   r   if_statementsr,   rK   r<   r   r   r   r   |   s     
z)_VisitorConditionalKernel.visit_Subscriptc                 C   sf   t |jtjr,|jjdkr,t|jdkr,t |jd tjr,|jd j| jv r,t| j	S | 
| |S )Nr6   r!   r   )r   r;   r   r   r   r6   r/   r   r%   r    generic_visitr   r   r   r   
visit_Call   s   
z$_VisitorConditionalKernel.visit_Call)r   r   r   r   r   rT   __classcell__r   r   rH   r   rD   w   s    rD   _FILENAME_TO_SRCXFORMERS_MATERIALIZE_CODEGEN1XFORMERS_KEEP_CODEGENc                 C   s   | t v rt |  S t| |S r   )rV   _getlines_orig)filenamemodule_globalsr   r   r   _monkey_patched_getlines   s   
r]   c                   @   s   e Zd ZdZdZdS )
VarargModeunrollconditionalN)r   r   r   UNROLLCONDITIONALr   r   r   r   r^      s    r^   r    modec                 C   s  t | j}t|j}|tjkrt|d}n
|tj	kr!t
|d}||}t|}tjjtjjfdkr9tdt|}d| jj d|j d| }trtsTt atjtj| d}trndatjtj|dd	 t |d
}	|	!| W d   n1 sw   Y  nt"st#j$a%t&t#_$d| d}|j'ddt"|< t(||d}
i }t)|
| jj*| t+|dksJ t+|t,t-|. }t /|}t0|dr|1| d|_2|S ||_|S )a"  
    Specializes a triton kernel with variable number of inputs
    to a specific number of inputs `N`.

    `mode` can either be `UNROLL` or `CONDITIONAL`. Both options
    implement the same functionality, but have different implementations
    and can have different performance. In `UNROLL` mode, any loops that
    loop over the varargs will be unrolled. In `CONDITIONAL` mode,
    indexing into the list of varargs is replaced with conditional
    statements like `a0 if i==0 else a1 if i==1 else a2...`.
    `CONDITIONAL` mode is generally better if `N` is large, because it
    generates a smaller triton kernel that should fit in the
    instruction cache and will compile faster.

    NOTE: Because it's quite costly to call `triton.jit`,
    we cache the returned value with `lru_cache`
    )r    )      z6Error: This functionality requires python 3.9 or abovezunroll_varargs--z.pyNT)exist_okw<>)keependsexecr!   _unsafe_update_src)3tritonJITFunctionfnr   parsesrcr^   ra   r5   rb   rD   r'   r?   sysversion_infomajorminorRuntimeErrorunparser   r   _should_materialize_codegen_tmp_dirtempfileTemporaryDirectoryospathjoinname _should_keep_materialized_sourcemakedirsdirnameopenwriterV   	linecachegetlinesrZ   r]   
splitlinescompilerl   __globals__r6   nextr9   valuesjithasattrrm   hash)kernelr    rc   kparsednodeVisitornew_srcfn_basenamefn_filenamefcode_localsrp   	jitted_fnr   r   r   unroll_varargs   sP   








r   r   )#r   r=   	functoolsr   r}   rs   r{   enumr   typingr   r   r   rn   NodeTransformerr   r   r5   rD   rZ   rV   r   __annotations__environgetry   r   rz   r]   r^   	lru_cachera   intr   r"   r   r   r   r   <module>   s0   
0*
L