o
    پi`                     @   s\   d dl Z dZe j rdndZe jg de jedZe jg de jedZG dd	 d	Z	dS )
    Ng      @cudacpu)r   g      ?   g      ?            dtypedevice)g      ?g      ?g      ?g      ?g      @g      @   c                   @   sl   e Zd ZdZeejdejdeejejf fddZ	eejej
fdejdejdejdejfd	d
ZdS )KVFP4QuantizeUtilzCUtility class for MXFP4 quantization and dequantization operations.tensorreturnc                 C   s   | j \}}}| ||| d d}| jdddj}tttj|t	 dd}|d 
dtj}|t| }|dk tjd	> }	| }
tj|
dtkdd
}|	|tj }||||}|ddddf d> |ddddf  }||fS )a  
        Quantize tensor to KVFP4 format
        Args:
            tensor: Input tensor of shape [B, M, N]

        Returns:
            quant_tensor: Quantized tensor of shape [B, M, N/2]
            scale_factors: Scale factors of shape [B, M*N/16]
           T)dimkeepdimg|=)min   r   r   )r   .r   Nr   r   )shapeviewabsmaxvaluestorchceillog2clampE2M1_MAXsqueezetouint8exp2sum	unsqueezeE2M1_BOUNDS)r   bmnreshaped	block_max	scale_expscale_factorsscaled	sign_bitsabs_valsmagnitude_bitsfp4_valsfp4_reshapedpacked r5   _/home/ubuntu/.local/lib/python3.10/site-packages/sglang/srt/layers/quantization/kvfp4_tensor.pybatched_quantize   s   (z"KVFP4QuantizeUtil.batched_quantizequant_tensorr-   r
   c                 C   s   | j \}}}|d }tj|||tj| jd}| d@ |ddddf< | d? d@ |ddddf< |d	@ dk}|d
@ }	t|	  }
t||
 |
}
|
||| d d}|	 d }|t
|d }|||||S )a$  
        Dequantize KVFP4 tensor
        Args:
            quant_tensor: Quantized tensor of shape [B, M, N/2]
            scale_factors: Scale factors of shape [B, M*N/16]
            dtype: Target dtype for output

        Returns:
            Dequantized tensor of shape [B, M, N]
        r   r	      .r   Nr   r         r   r   r   )r   r   emptyr"   r   E2M1_VALUESlongwherer   floatr#   r%   r!   )r8   r-   r
   r'   r(   n_halfr)   r2   	sign_maskmagnitude_idx
float_valsr*   r,   r.   r5   r5   r6   batched_dequantizeH   s   z$KVFP4QuantizeUtil.batched_dequantizeN)__name__
__module____qualname____doc__staticmethodr   compileTensortupler7   bfloat16r
   rE   r5   r5   r5   r6   r      s"    $'r   )
r   r   r   is_available_devicer   float32r=   r&   r   r5   r5   r5   r6   <module>   s   