o
    پi                     @   s   d dl Z d dlmZ d dlmZmZ d dlZddlmZ ddl	m
Z
 ddlmZmZmZ dd	d
Ze jdd Ze			ddejdededee deejejf f
ddZe	ddejdejdedejfddZdS )    N)SimpleNamespace)OptionalTuple   )flashinfer_api)#gen_mxfp8_quantization_sm100_module)device_support_pdlregister_custom_opregister_fake_op   c                 C   s,   | | d | | }|d d d }|| S )Nr          )	total_rowtotal_columnrow_size
padded_rowpadded_columnr   r   O/home/ubuntu/.local/lib/python3.10/site-packages/flashinfer/fp8_quantization.py _compute_swizzled_layout_sf_size   s   r   c                     s   t    tddd			 ddtjdtdtd	tt d
ttjtjf f
 fdd} t	d		ddtjdtdtd
ttjtjf fdd}tddd	ddtjdtjdtd
tjf fdd}t	d	ddtjdtjdtd
tjfdd}t
| |dS )Nz flashinfer::mxfp8_quantize_sm100 )mutates_argsT    inputis_sf_swizzled_layout	alignment
enable_pdlreturnc           
         s<  | j jdkr?tj| jtj| j d}|r"t| jd | jd d d}n|  d }tj|ftj| j d} | ||| ||fS |du rHt	| j }|  | jd  }| jd }|| d | | }	tjg | jdd |	R tj
| j d}|rt||	d d}n||	 d }tj|ftj| j d} | ||||| ||fS )	aK  Quantize input tensor to MxFP8 format.

        Args:
            input (torch.Tensor): Input tensor of shape [M, K] with dtype fp16/bf16/fp8_quantized.
            is_sf_swizzled_layout (bool, optional): Whether to use swizzled layout for scale factors. Defaults to True.
            alignment (int, optional): sfVecSize. Defaults to 32. Note that alignment is not used in the host kernel.
            enable_pdl (Optional[bool], optional): Whether to enable PDL (Programmatic Dependent Launch).
                If None, automatically detects based on device capability. Defaults to None.
        Returns:
            Tuple[torch.Tensor, torch.Tensor]: A tuple containing:
                - Quantized tensor of shape [M, K] with dtype FLOAT8_E4M3
                - Scale factors tensor with shape determined by layout and sf_vec_size
        cpudtypedevicer   r   r   r   N)r!   typetorchemptyshapeuint8r   numelmxfp8_quantize_hostr   float8_e4m3fnmxfp8_quantize)
r   r   r   r   out_valout_sf_sizeout_sfmkpadded_kmoduler   r   mxfp8_quantize_sm100   sL   

zAget_mxfp8_quantization_sm100_module.<locals>.mxfp8_quantize_sm100c                 S   s8   | j \}}| j||gtjd| j|| d gtjdfS )Nr    r   )r&   	new_emptyr$   int64int32)r   r   r   r/   r0   r   r   r   _fake_mxfp8_quantize_sm100[   s   
zGget_mxfp8_quantization_sm100_module.<locals>._fake_mxfp8_quantize_sm100z'flashinfer::mxfp8_dequantize_host_sm100)r   scale_tensorc                    s*   t j| jt j| jd} | ||| |S )a  Dequantize input tensor from MxFP8 format.

        Args:
            input (torch.Tensor): Input tensor of shape [M, K] with dtype FLOAT8_E4M3.
            scale_tensor (torch.Tensor): Scale factors tensor with shape determined by layout and sf_vec_size.
            is_sf_swizzled_layout (bool, optional): Whether to use swizzled layout for scale factors. Defaults to True.

        Returns:
            torch.Tensor: Dequantized float tensor of shape [M, K] with dtype float32.
        r   )r$   r%   r&   float32r!   mxfp8_dequantize_host)r   r:   r   outr2   r   r   mxfp8_dequantize_host_sm100g   s   zHget_mxfp8_quantization_sm100_module.<locals>.mxfp8_dequantize_host_sm100c                 S   s    | j | jd | jd gtjdS )Nr   r   r5   )r6   r&   r$   r;   r   r:   r   r   r   r   !_fake_mxfp8_dequantize_host_sm100   s    zNget_mxfp8_quantization_sm100_module.<locals>._fake_mxfp8_dequantize_host_sm100)r4   r>   Tr   N)Tr   T)r   build_and_loadr	   r$   Tensorboolintr   r   r
   r   )r4   r9   r>   r@   r   r2   r   #get_mxfp8_quantization_sm100_module   sx   
=rG   Tr   r   r   r   r   r   c                 C   sJ   d}| j d | dksJ |du rt| j}t | |||\}}||fS )a  Quantize input tensor to MxFP8 format.

    This function implements MxFP8 quantization that converts input tensors to a compressed MxFP8 format
    with associated scale factors. It supports various input data types and scale factor layouts.

    Args:
        input (torch.Tensor): Input tensor of shape [M, K] with dtype fp16/bf16/fp8_quantized.
        is_sf_swizzled_layout (bool, optional): Whether to use swizzled layout for scale factors. Defaults to True.
        alignment (int, optional): sfVecSize. Defaults to 32.
        enable_pdl (Optional[bool], optional): Whether to enable PDL (Programmatic Dependent Launch).
            If None, automatically detects based on device capability. Defaults to None.
    Returns:
        Tuple[torch.Tensor, torch.Tensor]: A tuple containing:
            - Quantized tensor of shape [M, K] with dtype FLOAT8_E4M3
            - Scale factors tensor with shape determined by layout and sf_vec_size
    r   r"   r   N)r&   r   r!   rG   r4   )r   r   r   r   sf_vec_sizex_qsfr   r   r   r+      s   
r+   r:   c                 C   s   t  | ||S )av  Dequantize input tensor from MxFP8 format.

    This function performs dequantization by converting a packed FP8 tensor in MxFP8 format
    back to float values using the associated scale factors.

    Args:
        input (torch.Tensor): Packed FP8 tensor in MxFP8 format of shape [M, K] with dtype FLOAT8_E4M3.
        scale_tensor (torch.Tensor): Scale factors tensor with shape determined by layout and sf_vec_size.
        is_sf_swizzled_layout (bool, optional): Whether scale factors use swizzled layout. Defaults to True.

    Returns:
        torch.Tensor: Dequantized float tensor of shape [M, K] with dtype float32.

    )rG   r>   r?   r   r   r   r<      s
   r<   )r   rA   rB   )	functoolstypesr   typingr   r   r$   api_loggingr   jit.fp8_quantizationr   utilsr   r	   r
   r   cacherG   rD   rE   rF   r+   r<   r   r   r   r   <module>   sH    

{$