o
    i}                     @   s   d dl mZ d dlmZmZ d dlZd dlZddlmZm	Z	 d dl
mZ eddG d	d
 d
ZeddG dd dZe	ZG dd dejjZdddZdd ZdS )    )	dataclass)
InFlexDataOutFlexDataN   )_swiglu
_swiglu_fn)target_infoT)frozenc                   @   s6   e Zd ZU e Zeed< e Zeed< dZe	ed< dS )FlexCtxout_datainp_dataFsaturate_infN)
__name__
__module____qualname__r   r   __annotations__r   r   r   bool r   r   \/home/ubuntu/vllm_env/lib/python3.10/site-packages/vllm/third_party/triton_kernels/swiglu.pyr
   	   s   
 r
   c                   @   s$   e Zd ZU eed< e Zeed< dS )PrecisionConfiglimitflex_ctxN)r   r   r   floatr   r
   r   r   r   r   r   r      s   
 r   c                   @   s   e Zd Zedd ZdS )SwiGLUc                 C   s  |j d }| | }| d dksJ |j d d dks J tj||d f|j|jd}|j}d|j d}	}
d}t	
 sCd	d
ini }t|d |
}t	 }|d urwt	
 r[dnd}|||  }tdt||}t|| d| f}nt||	}|| d| krd| f}n
t|| d| f}d }|d ur|jj|j }t| |j||jj|jj|jj|j||jj|||d |j d d|j d d|j|f|	|
|d |
 dk|||j|d| ||j d d |j dd   }|S )Nr      r   )sizedtypedevice          maxnreg@      )BLOCK_MBLOCK_NEVEN_NM_BLOCKSN_BLOCKSflexpoint_saturate_inf	num_warps)shapenumelstridetorchemptyr   r   r   itemsizer   is_hiptritoncdivnum_smsmaxmin	expt_datatoken_offs_rawn_expts_totr   r   reinterpretexpected_scaleactual_scalechecksum_scaler   scaler   r   view)ctxaalphaprecision_configrouting_dataNMoutr   r%   r&   r+   kwargsr)   r5   waves_per_smnum_pidr(   gridn_tokensr   r   r   forward   sh   


"zSwiGLU.forwardN)r   r   r   staticmethodrN   r   r   r   r   r      s    r   c                 C   s   t | |||S N)r   apply)rB   rC   rD   rE   r   r   r   swigluU   s   rR   c                 C   sx   |j }| dd d df }|d ur|j|d}| ddd df }|d ur+|j| |d}|t||  }||d  }|S )N.r   )r6   r   )r7   r6   )r   clampr/   sigmoid)rB   rC   rD   r   a_gelua_linearout_gelurH   r   r   r   swiglu_torchY   s   rX   rP   )dataclassesr   triton_kernels.numericsr   r   r/   r3   swiglu_details._swiglur   r   triton_kernelsr   r
   r   	swiglu_fnautogradFunctionr   rR   rX   r   r   r   r   <module>   s    
<