o
    پiB                     @   s  d dl Z d dlZd dlZd dlZdejdejdejddfddZdejd	ejd
ejddfddZdejdejdejddfddZdejd	ejd
ejddfddZdejdejdejddfddZ	dejd	ejd
ejddfddZ
dZdZdZee ZdZdZee ZdZejZg dZejdededdfddZejdededdfddZg dZg d Zejd!ee eed"ed#eddfd$d%Zejd!ee eed"ed#eddfd&d'Z e!d(kre"e#g dS dS ))    Nkk_nopek_ropereturnc                 C   sR   |j d }|| ddddd|f< |d| j d d| dddd|df< dS )z2Reference PyTorch implementation for concat_mla_k.N   )shapeexpand)r   r   r   nope_head_dim r   [/home/ubuntu/.local/lib/python3.10/site-packages/sglang/jit_kernel/tests/test_concat_mla.pytorch_concat_mla_k   s   
.r   aboutc                 C   sB   | j d }| |ddddd|f< ||dddd|df< dS )z9Reference PyTorch implementation for concat_mla_absorb_q.r   N)r   )r   r   r   
a_last_dimr   r   r   torch_concat_mla_absorb_q   s   
r   c                 C      ddl m} || || dS )'AOT compiled sgl_kernel implementation.r   concat_mla_kN)
sgl_kernelr   r   r   r   r   r   r   r   sgl_kernel_concat_mla_k!      r   c                 C   $   ddl m} || |}|| dS )r   r   concat_mla_absorb_qN)r   r   copy_r   r   r   r   resultr   r   r   sgl_kernel_concat_mla_absorb_q*      
r!   c                 C   r   )zJIT compiled implementation.r   r   N)sglang.jit_kernel.concat_mlar   r   r   r   r   jit_concat_mla_k4   r   r$   c                 C   r   )z=JIT compiled implementation - wrapper for test compatibility.r   r   N)r#   r   r   r   r   r   r   jit_concat_mla_absorb_q=   r"   r%      @      cuda)r                   r'   r&      r(   i   
num_tokensc                 C      t j| ttttd}t j| ttttd}t j| ttttd}t j| dtttd}t	||| t
||| tjj||ddd dS )*Test JIT kernel against PyTorch reference.devicedtyper   r   atolrtolN)torchemptyNUM_LOCAL_HEADS
K_HEAD_DIMDEVICEDTYPErandnQK_NOPE_HEAD_DIMQK_ROPE_HEAD_DIMr   r$   tritontestingassert_close)r0   k_jitk_torchr   r   r   r   r   test_concat_mla_k_jit_vs_torchX      


rG   c                 C   r1   );Test JIT kernel against AOT kernel for bitwise equivalence.r3   r   r   r6   N)r9   r:   r;   r<   r=   r>   r?   r@   rA   r   r$   rB   rC   rD   )r0   rE   k_aotr   r   r   r   r   test_concat_mla_k_jit_vs_aotm   rH   rK   )r   r*   r+   r,   r-   r.   )r   r*   r+   r,   r-   r&   zdim_0,dim_1dim_0dim_1c                 C      t j| |tttd}t j| |tttd}t j| |tttd}t j| |tttd}t||| t	||| t
jj||ddd dS )r2   r3   r   r6   N)r9   r?   
A_LAST_DIMr=   r>   
B_LAST_DIMr:   OUT_LAST_DIMr   r%   rB   rC   rD   )rL   rM   r   r   out_jit	out_torchr   r   r   %test_concat_mla_absorb_q_jit_vs_torch      rT   c                 C   rN   )rI   r3   r   r6   N)r9   r?   rO   r=   r>   rP   r:   rQ   r!   r%   rB   rC   rD   )rL   rM   r   r   rR   out_aotr   r   r   #test_concat_mla_absorb_q_jit_vs_aot   rU   rW   __main__)$	itertoolspytestr9   rB   Tensorr   r   r   r!   r$   r%   r;   r@   rA   r<   rO   rP   rQ   r=   bfloat16r>   NUM_TOKENS_LISTmarkparametrizeintrG   rK   
DIM_0_LIST
DIM_1_LISTlistproductrT   rW   __name__main__file__r   r   r   r   <module>   s    


	


	
