o
    پi                     @   s  d dl Z d dlZd dlZd dlZd dlmZ d dlmZ d dl	m
Z
 d dlmZ z
d dlmZ dZW n ey=   dZY nw e dd	 d
kpQe dd	 d
kZdZdZdZejZejeefejddZeeeedd\ZZZZZ Z!e
ej"Z#dd Z$dd Z%erg dZ&ng dZ&erddgZ'ddgZ(ddgZ)n	dgZ'dgZ(dgZ)ej*+ej*j,dge&de'e(e)dd i d!	d"d# Z-e.d$kre%  e-j/dd% dS dS )&    N)scalar_types)gptq_marlin_gemm)marlin_make_workspace)marlin_quantizeTFCIfalsetrueGITHUB_ACTIONSi      cudadtypedevice)	act_orderc                 C   s0   | |d t td d tttt|jd ttdddddS )Nr   TF)	is_k_fulluse_atomic_adduse_fp32_reduceis_zp_float)	_marlin_q_w	_marlin_s_g_idx_sort_indices
_workspace
QUANT_TYPEshapeSIZE_NSIZE_K)fna r   a/home/ubuntu/.local/lib/python3.10/site-packages/sglang/jit_kernel/benchmark/bench_gptq_marlin.py	_run_gemm&   s&   r!   c                  C   sZ   t std d S tjdtftjdd} tt| }tt| }tj	j
||ddd td d S )Nz8sgl_kernel AOT not available, skipping correctness check   r   r   gMbP?)rtolatolz%Correctness check passed (JIT vs AOT))AOT_AVAILABLEprinttorchrandnr   float16r!   jit_gptq_marlin_gemmaot_gptq_marlin_gemmtestingassert_close)r   out_jitout_aotr   r   r    check_correctness<   s   

r0   )   r"   r
   )
r1            r"       @   r
      i   jitaotz
JIT Kernelz
AOT Kernel)blue-)greenr;   size_mprovideruszgptq-marlin-gemm-performance)	x_namesx_valsline_arg	line_vals
line_namesstylesylabel	plot_nameargsc                    s   t d}t j| tft j|d g d}|dkr fdd}n|dkr* fdd}ntd	| tjj||d
\}}}d| d| d| fS )Nr   r   )g      ?g?g?r8   c                      
   t t S N)r!   r*   r   r   r   r    <lambda>j      
 zbenchmark.<locals>.<lambda>r9   c                      rI   rJ   )r!   r+   r   rK   r   r    rL   l   rM   zUnknown provider: )	quantilesi  )	r'   r   r(   r   r)   
ValueErrortritonr,   do_bench_cudagraph)r=   r>   r   rN   r   msmin_msmax_msr   rK   r    	benchmarkV   s   
rU   __main__)
print_data)0osr'   rP   triton.testingsgl_kernel.scalar_typer   sglang.jit_kernel.gptq_marlinr   r*   +sglang.srt.layers.quantization.marlin_utilsr   sglang.test.test_marlin_utilsr   
sgl_kernelr+   r%   ImportErrorgetenvlowerIS_CIr   r   
GROUP_SIZEuint4b8r   r(   r)   	_b_weight_w_refr   r   r   r   _r   r   r!   r0   m_rangerC   rD   rE   r,   perf_report	BenchmarkrU   __name__runr   r   r   r    <module>   sp    



