o
    پi
                     @   sl  d dl Z d dlZd dlZd dlZdejdejdejdejddf
ddZdejdejdejdejddf
d	d
Zdejdejdejdejddf
ddZe 	d#dejdejdejdejde	ddfddZ
dd ed dD Zedd eeD 7 ZddgZddgZg dZdZejZejdee eeeedededededdf
d d!Zed"kreeg dS dS )$    Nqkq_weightk_weightreturnc                 C   sN   ddl m} | jd }| d|} |d|}|| || d ||||d d S )Nr   rmsnormout)
sgl_kernelr   shapeview)r   r   r   r   r   head_dim r   W/home/ubuntu/.local/lib/python3.10/site-packages/sglang/jit_kernel/tests/test_qknorm.pysglang_aot_qknorm   s   
r   c                 C   s   ddl m} || ||| d S )Nr   )fused_inplace_qknorm)sglang.jit_kernel.normr   )r   r   r   r   r   r   r   r   sglang_jit_qknorm   s   r   c                 C   s,   ddl m} || || d ||||d d S )Nr   r   r
   )flashinfer.normr   )r   r   r   r   r   r   r   r   flashinfer_qknorm"   s   r   ư>epsc           	      C   s   |   djddd}|  djddd}||  }||  }| |   | |    ||  | |    d S )N   r	   T)dimkeepdim)floatpowmeanrsqrtcopy_)	r   r   r   r   r   q_meank_meanq_normk_normr   r   r   torch_impl_qknorm.   s   r&   c                 C   s   g | ]}d | qS )r   r   ).0nr   r   r   
<listcomp>>   s    r)      c                 C   s   g | ]
\}}|d  | qS )   r   )r'   ixr   r   r   r)   ?   s    r            )@         i   i   cudazbatch_size,n_k,n_q,head_dim
batch_sizen_kn_qr   c           
      C   s   t j| ||ttd}t j| ||ttd}t j|ttd}t j|ttd}| | f}| | f}	t|d |d || t|	d |	d || tjj	|d |	d ddd tjj	|d |	d ddd d S )N)devicedtyper   r+   g{Gz?)atolrtol)
torchrandnDEVICEDTYPEcloner   r   tritontestingassert_close)
r5   r6   r7   r   r   r   r   r   q_k_aotq_k_jitr   r   r   test_qknormK   s    rF   __main__)r   )	itertoolspytestr<   rA   Tensorr   r   r   compiler   r&   rangeBS_LIST	enumerateN_K_LISTN_Q_LISTHEAD_DIM_LISTr>   bfloat16r?   markparametrizelistproductintrF   __name__main__file__r   r   r   r   <module>   s    


 