o
    پi4	                     @   s   d dl mZmZmZ d dlZd dlmZmZ 		ddejdejdee	 dejdeej d	eej d
ejfddZ
ejfdejdejd
eejejf fddZdejdejdee	 d
ejfddZdS )    )ListOptionalTupleN)per_token_group_quant_int8w8a8_block_int8_matmulinputweight
block_sizeweight_scaleinput_scalebiasreturnc                 C   s   |d u sJ |  d| jd }g | jd d |jd }t||d \}}	t|||	||| jd}
|d ur9|
| }
|
j| jdj | S )Nr      )output_dtype)dtype)viewshaper   r   r   to)r   r   r	   r
   r   r   input_2doutput_shapeq_inputx_scaleoutput r   ]/home/ubuntu/.local/lib/python3.10/site-packages/sglang/srt/layers/quantization/int8_utils.pyapply_w8a8_block_int8_linear   s   r   xr   c           
      C   st   t |}|  \}}t | | jdd}|j|j}}|| }| | j||d}	|	|	 |
  fS )zRThis function quantizes input values to int8 values with tensor-wise quantization.g-q=)min)r   max)torchiinfoaminmaxmaximumabsclampr   r   r   
contiguousfloat
reciprocal)
r   r   r!   min_valmax_valamaxint8_minint8_maxscale	x_scl_satr   r   r   input_to_int8"   s   
r0   	x_q_blockx_sc                 C   s   |d |d }}| j \}}|| d | }|| d | }||j d ks'J ||j d ks0J | tj}	t|D ]-}
t|D ]&}|	|| t|d | ||
| t|
d | |f  || |
 9  < q@q:|	S )zThis function conducts block-wise dequantization.
    The inputs are block-wise quantization tensor `x_q_block`, block-wise quantization scale
    and the block size.
    The outputs are dequantized tensor.
    r   r   )r   r   r    float32ranger   )r1   r2   r	   block_nblock_knkn_tilesk_tiles
x_dq_blockijr   r   r   block_dequant/   s$   


r>   )NN)typingr   r   r   r    *sglang.srt.layers.quantization.int8_kernelr   r   Tensorintr   int8r   r0   r>   r   r   r   r   <module>   sJ    

