o
    پi                     @   s  d dl mZ d dlZd dlmZ d dlZd dlmZ d dl	m
Z
 g dZejdejfddZejdejd	ejd
ejdejdejf
ddZ	ddejdedeej fddZG dd dejjZ	ddejdedeej dejfddZeZG dd dejZdS )    )OptionalN)input_guard)          @      BDc                 C   s   t d}| || 7 } ||| 7 }t d|}||k }t j| | |ddt j}t j|| dd}	dt |	|  }
||
 }t j|| ||d d S )Nr   g        )maskotheraxis   )r
   )	tl
program_idarangeloadtofloat32sumsqrtstore)xyDr	   epsi_tcolsr
   b_xb_varb_rstdb_y r"   Z/home/ubuntu/.local/lib/python3.10/site-packages/sglang/srt/layers/attention/fla/l2norm.pyl2norm_fwd_kernel1   s   
r$   NBTr   BTc                 C   s   t d}t | ||f|df|| df||fd}	t j|	ddt j}
t j|
|
 dd}|
t || d d d f  }t |||f|df|| df||fd}t j|||j	j
dd d S )Nr   r   )r   r   )r   r   )boundary_checkr   )r   r   make_block_ptrr   r   r   r   r   r   dtype
element_ty)r   r   r   r%   r&   r   r'   r	   r   p_xr   r   r!   p_yr"   r"   r#   l2norm_fwd_kernel6   s   
((r.   ư>r   r   output_dtypec           
         s   | j }| d| j d } |d u rt| }ntj| |d}|ddks&J | j d | j d  }d|   }t|t|}||krGt	d|dkrht
 d} fd	d
}	t|	 | ||| ||dddd
 nt f | ||||ddd ||S )N)r*   r   r   i   z/This layer doesn't support feature dim >= 64KB.i   i   c                    s   t  | d fS )Nr'   )tritoncdiv)metar&   r"   r#   grid`   s   zl2norm_fwd.<locals>.gridr   r      )r%   r&   r   r	   r'   	num_warps
num_stages)r   r   r	   r8   r9   )shapeviewtorch
empty_likestrideelement_sizeminr2   next_power_of_2RuntimeErrorr3   r.   r$   )
r   r   r0   
x_shape_ogr   r   MAX_FUSED_SIZEr	   r%   r6   r"   r5   r#   
l2norm_fwdJ   sH   

rE   c                   @   s   e Zd ZeedddZdS )L2NormFunctionr/   Nc                 C   s   t |||S N)rE   )ctxr   r   r0   r"   r"   r#   forward   s   zL2NormFunction.forwardr/   N)__name__
__module____qualname__staticmethodr   rI   r"   r"   r"   r#   rF   }   s    rF   returnc                 C   s   t | ||S rG   )rF   apply)r   r   r0   r"   r"   r#   l2norm   s   rQ   c                       sD   e Zd Zddedeej f fddZdejdejfd	d
Z	  Z
S )L2Normr/   Nr   r0   c                    s   t    || _|| _d S rG   )super__init__r   r0   )selfr   r0   	__class__r"   r#   rT      s   

zL2Norm.__init__r   rO   c                 C   s   t || j| jS rG   )rQ   r   r0   )rU   r   r"   r"   r#   rI      s   zL2Norm.forwardrJ   )rK   rL   rM   floatr   r<   r*   rT   TensorrI   __classcell__r"   r"   rV   r#   rR      s    rR   rJ   )typingr   r<   torch.nnnnr2   triton.languagelanguager   %sglang.srt.layers.attention.fla.utilsr   BT_LISTjit	constexprr$   r.   rY   rX   r*   rE   autogradFunctionrF   rQ   l2_normModulerR   r"   r"   r"   r#   <module>   sX   	
3	
