o
    Ti                     @   sl   d dl Z d dlZd dlmZ 	 ejdejfddZejdejfddZdde j	d	e j	d
e j	fddZ
dS )    N
BLOCK_SIZEc                 C   s   t d}|||  }t d|}|| }t j|||k td dt j}	|	t j|	dd }
t |
}t j	|dd}|| }| ||  }|| }t j
||||k d d S Nr   inf)maskother)axis)r   tl
program_idarangeloadfloattofloat32maxexpsumstore)
output_ptr	input_ptrstriden_colsr   row_idxrow_start_ptrcol_offsets
input_ptrsrowrow_minus_max	numeratordenominatorsoftmax_outputoutput_row_start_ptroutput_ptrs r#   f/home/ubuntu/.local/lib/python3.10/site-packages/deepspeed/ops/transformer/inference/triton/softmax.pysoftmax_kernel   s   
"
r%   c                 C   s   t d}|||  }t d|}	||	 }
||	 ||  }t j|
|	|k td dt j}t j||	|k ddt j}|t j|dd }|| }t |}t j	|dd}|| }| ||  }||	 }t j
|||	|k d d S r   r   )r   r   r   mask_ptrmask_strider   r   r   r   r   r   	mask_ptrsr   r   r   r   r   r    r!   r"   r#   r#   r$   masked_softmax_kernel    s   
"
r)   inputr   returnc              
   C   s  |   sJ |dks|t| jd ksJ d|d u rdnd}| d| jd }|j\}}tt|d}d}|dkr=d	}|d
krCd}t| }	|r{|  sPJ |d|jd }|jd dkre|jd nd}
t	|f |	| |
d||
|||d |	S t|f |	| |
d|||d |	S )Nr*      zOnly dim=-1 is supportedFT      i      i      r   )	num_warpsr   )is_contiguouslenshapeviewr   tritonnext_power_of_2torch
empty_liker)   r   r%   )r+   r   dimuse_mask	input_argn_rowsr   r   r3   outputr'   r#   r#   r$   softmax3   sH   "

rA   )Nr*   )r:   r8   triton.languagelanguager	   jit	constexprr%   r)   TensorrA   r#   r#   r#   r$   <module>   s   "