o
    پi                     @   s   d dl Z d dlZd dlZd dlZd dlmZ dejdejdee	e
f fddZddee	ee	e
f f fd	d
Zg dZejdedd ZdS )    N)KVFP4QuantizeUtiloriginalreconstructedreturnc                 C   s   t | | d  }t t | |  }t t |  }|dkr1dt|t|  ntd}t t | | t | d   }||||dS )zFCalculate accuracy metrics between original and reconstructed tensors.   r      infg:0yE>)MSEMAEPSNRzRelative Error)	torchmeanitemabsmaxnplog10sqrtfloat)r   r   msemaemax_valpsnr	rel_error r   X/home/ubuntu/.local/lib/python3.10/site-packages/sglang/test/test_kvfp4_quant_dequant.pycalculate_accuracy_metrics   s   (r   d   c                 C   st  t j| ||t jdd}tdD ]}|d }qt j  t }t|D ]}|t j}q#t j  t | | }t }t|D ]}|t j}	qAt j  t | | }
t	||	}t
|\}}t
||}t }t|D ]	}t
|\}}qqt j  t | | }t }t|D ]}t
||}qt j  t | | }t	||}||
d|||d|dS )z;Run FP8 vs KVFP4 quantization benchmark and return metrics.cuda)dtypedevice   r   )
quant_timedequant_time)fp8fp4)r   randnbfloat16ranger   synchronizetimetofloat8_e4m3fnr   r   batched_quantizebatched_dequantize)mnknum_runstensor_bf16_start
tensor_fp8fp8_quant_timetensor_fp8_dequantfp8_dequant_timefp8_metrics
tensor_fp4scale_factorsfp4_quant_timetensor_fp4_dequantfp4_dequant_timefp4_metricsr   r   r   run_benchmark   sR   







rA   )
)@      @  )   rC   rD   )   rC   rD   )   rC   rD   )+ rC   rD   )rB      rB   )rE   rI   rB   )rF   rI   rB   )rG   rI   rB   )rH   rI   rB   zm,n,kc                 C   sp   t d|  d| d| d t| ||}t d|d  t d|d  |d d d	k s,J |d d d	k s6J d
S )z4Benchmark FP8 vs KVFP4 for predefined tensor shapes.z*
=== Running benchmark for tensor shape: [z, z] ===zFP8:r$   zFP4:r%   r	   g      ?N)printrA   )r/   r0   r1   resultsr   r   r   test_kvfp4_quant_dequanti   s   rL   )r   )r*   numpyr   pytestr   +sglang.srt.layers.quantization.kvfp4_tensorr   Tensordictstrr   r   rA   MNK_FACTORSmarkparametrizerL   r   r   r   r   <module>   s    

 <