o
    Ti                     @   sf   d dl Z d dlZd dlmZ d dlmZ ejdd Zejdej	fddZ
de jd	e jfd
dZdS )    N)get_acceleratorc                 C   s   | d dt j| d   S )Ng      ?g      ?gZf?)tlmatherf)x r   c/home/ubuntu/.local/lib/python3.10/site-packages/deepspeed/ops/transformer/inference/triton/gelu.pygelu_functor   s   r	   
BLOCK_SIZEc           
      C   s^   t jdd}|| }|t d| }||k }t j| | |d}t|}	t j|| |	|d d S )Nr   )axis)mask)r   
program_idarangeloadr	   store)
x_ptr
output_ptr
n_elementsr
   pidblock_startoffsetsr   r   outputr   r   r   gelu_kernel   s   r   activationsreturnc                    sR   |   sJ t | sJ t| }|   fdd}t| | | dd |S )Nc                    s   t  | d fS )Nr
   )tritoncdiv)metar   r   r   <lambda>$   s    zgelu.<locals>.<lambda>i   )r
   )is_contiguousr   on_acceleratortorch
empty_likenumelr   )r   r   gridr   r   r   gelu   s   
r&   )r"   r   triton.languagelanguager   deepspeed.acceleratorr   jitr	   	constexprr   Tensorr&   r   r   r   r   <module>   s   

