o
    پi                     @   s&   d dl mZ d dlZG dd dZdS )    )OptionalNc                   @   sZ   e Zd ZdZg dZeg dZedej	de
e defddZed	ejfd
dZdS )MXFP4QuantizeUtilg      @)r   g      ?   g      ?            )g      ?g      ?g      ?g      ?g      @g      @   input
block_sizereturnc                    s    fdd}dd }|du rd}|j }|j}|d|}| jddd	j}| j }tjd
|j	d}	t
tt||	}
|t|
 |}||}||}|
d tj}
 ||||
fS )a  Converting a tensor to a quantized format based on MXFP4 quantization. Only E4M3 is supported.
        Args:
            input (torch.Tensor): The input tensor to be quantized.
            block_sizes (dict | None): The block sizes for quantization.
        c                    sX   t | }d| d }t j|  d j| j dkdd}|d | t j}|S )Nr   r   )dim   )	torchsignsumabs	unsqueezeE2M1_boundstodeviceuint8)xr   sign_bitord_fp4_valcls _/home/ubuntu/.local/lib/python3.10/site-packages/sglang/srt/layers/quantization/mxfp4_tensor.pycast_fp4$   s   
 z,MXFP4QuantizeUtil.quantize.<locals>.cast_fp4c                 S   sR   | ddd df }| ddd df }|  d> }|dd |jd f  |7  < |S )N.r   r   r   r   r   )cloneshape)r   	left_side
right_sidenew_datar   r   r    fuse_uint4_to_uint8-   s   
z7MXFP4QuantizeUtil.quantize.<locals>.fuse_uint4_to_uint8N    r   T)r   keepdimg     _r      )r#   dtypeviewr   maxvaluesE2M1_maxr   tensorr   ceilmaximumlog2exp2r   r   )r   r
   r   r!   r'   original_shapeoriginal_dtype
input_amaxdescale	min_value
e8m0_scaleinput_qr   r   r    quantize   s    	
zMXFP4QuantizeUtil.quantizer,   c                 C   s   dd }|}|d }||}dd|d@ d?  tj  }	|d@ }
|
 tj}
tj| j|jd	}|
j}||
d |}|		 | }|d|}t
|	 d
 }|dd}|| }|| |S )z0Dequantze MXFP4 packed tensor to a target dtype.c                 S   sj   | d@ }| d? d@ }t | j}|d d |d< tj|tj| jd}||ddddf< ||dd	ddf< |S )
zxUnfuse uint8 values back to uint4 values.
            This is the inverse operation of fuse_uint4_to_uint8.
               r   r   r   )r,   r   .r   Nr   )listr#   r   zerosr   r   )r   r$   r%   r#   resultr   r   r    unfuse_uint8_to_uint4P   s   
z;MXFP4QuantizeUtil.dequantize.<locals>.unfuse_uint8_to_uint4r   r   r   r   r      r*   r+   )r   r   float32longr1   E2M1_valuesr   r#   reshapefloatr5   )r   quantized_datar,   scaleblock_sizesrB   r;   r   	x_unfusedr   	magnituder/   r6   x_floatscale_factorr   r   r    
dequantizeL   s$   zMXFP4QuantizeUtil.dequantizeN)__name__
__module____qualname__r0   rF   r   r1   r   classmethodTensorr   inttupler=   r,   rP   r   r   r   r    r      s    /r   )typingr   r   r   r   r   r   r    <module>   s   