o
    پiM                     @   s   d dl Z d dlZd dlmZmZ d dlmZmZ e Ze Z	e Z
e
r&ejnejZe jdejejgdddZes=e	r]e jdejejgddd	Ze jdejejgdd
dZedkrie eg dS dS )    N)is_fp8_fnuzscaled_fp8_quant)is_cudais_hipdtypereturnc                 C   s   dd }dd }t jdddd | }t|d \}}|||}t j|| t j|||| ||||  t||\}}|||}t j|| t j|||| ||||  d S )	Nc                 S   >   t t}| }| t j| j|j|jd}|t}|S N)minmax	torchfinfo	fp8_dtype
reciprocaltofloat32clampr
   r   tensor	inv_scaler   scaleqweight r   O/home/ubuntu/.local/lib/python3.10/site-packages/sglang/test/test_custom_ops.pyquantize_ref_per_tensor   s
   

zAtest_scaled_fp8_quant_per_tensor.<locals>.quantize_ref_per_tensorc                 S      |  |}|| }|S Nr   r   r   r   fake_qweight	dq_weightr   r   r   dequantize_per_tensor      
z?test_scaled_fp8_quant_per_tensor.<locals>.dequantize_per_tensor)   r$   cudasizedevice   r   randnr   r   testingassert_close)r   r   r"   xyr   ref_y_r   r   r    test_scaled_fp8_quant_per_tensor   s"   	





r2   c                 C   st   dd }dd }t jdddd | }t|d d	d
\}}|||}t j|| t j|||| ||||  d S )Nc                 S   r   r	   r   r   r   r   r   quantize_ref_per_token<   s   

zGtest_scaled_fp8_quant_per_token_dynamic.<locals>.quantize_ref_per_tokenc                 S   r   r   r   r   r   r   r   dequantize_per_tokenG   r#   zEtest_scaled_fp8_quant_per_token_dynamic.<locals>.dequantize_per_token)r$      r%   r&   r)   Tuse_per_token_if_dynamicr*   )r   r3   r4   r.   r/   r   r0   r   r   r   'test_scaled_fp8_quant_per_token_dynamic:   s   


r8   c                 C   sr  d}t j|dfddd | }d}t|d |d\}}|jd |ks$J |jd	 |jd	 ks0J t|d \}}t j|d | | t|d \}}	t||	|d\}
}|
jd |ks[J |
jd	 |jd	 ksgJ t||	\}}t j|
d | | t|d |d
d\}}|jd |ksJ |jd	 |jd	 ksJ t|d d
d\}}t j|d | | t j|d | | d S )N   r5   r%   r&   r)   
   )num_token_paddingr      T)r;   r7   r6   )r   r+   r   r   shaper,   r-   )r   original_rowsr.   padding_size	y_dynamicscale_dynamicy_without_paddingscale_without_paddingr1   r   y_staticy_static_without_paddingy_per_tokenscale_per_tokeny_per_token_without_paddingscale_per_token_without_paddingr   r   r   "test_scaled_fp8_quant_with_paddingZ   s<   


rJ   __main__)r   N)pytestr   )sglang.srt.layers.quantization.fp8_kernelr   r   sglang.srt.utilsr   r   _is_cuda_is_hip_is_fp8_fnuzfloat8_e4m3fnuzfloat8_e4m3fnr   markparametrizefloat16bfloat16r2   r8   rJ   __name__main__file__r   r   r   r   <module>   s$   (7