o
    i                  	   @   sJ   d dl Z d dlmZ d dlmZ e jdejdejdejddfddZdS )	    N)scale_and_clamp
BLOCK_SIZEHAS_X_SCALEHAS_O_SCALEreturnc
                 C   s   t jddt j}
t jdd}| ||
  }|||
  }|| t d| }||k }t j|| |dt j}t j|| | |dt j}|rUt |}||9 }||9 }t || | }|	rmt |}t||| j	j
}t j|| ||d dS )a  Sigmoid Linear Unit and Multiplication Kernel

    Args:
        o_ptr:       Pointer to the 2D output tensor.
        o_stride:    Output tensor stride.
        o_scale_ptr: The optional, known scale of the output activations.
        x_ptr:       Pointer to the 2D input tensor.
        x_stride:    Input tensor stride.
        x_scale_ptr: The optional, known scale of the input tensor.
        d:           The number of elements along the second dimension.
        BLOCK_SIZE:  Tunable block size to process in each kernel.

    Operating on a 2D grid, computes the following:

    ```
    out[i, j] = sigmoid(x[i, j]) * x[i, j] * x[i, j + d]
    ```

    If scales are provided, the input and output tensors are scaled.
    r   )axis   )maskN)tl
program_idtoint64arangeloadfloat32sigmoidr   dtype
element_tystore)o_ptro_strideo_scale_ptrx_ptrx_stridex_scale_ptrdr   r   r   ij	o_row_ptr	x_row_ptroffsetsr	   abx_scaleresulto_scale r&   Z/home/ubuntu/vllm_env/lib/python3.10/site-packages/flashinfer/triton/kernels/activation.pysilu_and_mul_kernel   s"   "

r(   )	tritontriton.languagelanguager
   flashinfer.triton.kernels.quantr   jit	constexprr(   r&   r&   r&   r'   <module>   s    	
