o
    پi8                  	   @   s  d dl mZ d dlZd dlZd dlm  mZ ddlm	Z	 ddl
mZmZmZ G dd deZejfdejd	ejd
edefddZdejdejdejd
efddZdd ZdejfddZejfdejd	ejd
edefddZdd Zdejdejdejd
efddZeZdS )    )EnumN   )_upcast_from_mxfp)_downcast_to_mxfpMXFP_BLOCK_SIZE_quantize_mxfp8_fnc                   @   s   e Zd ZdZdZdS )DequantScaleRoundingModer   r   N)__name__
__module____qualname__ROUND_UP
ROUND_DOWN r   r   X/home/ubuntu/.local/lib/python3.10/site-packages/triton_kernels/numerics_details/mxfp.pyr      s    r   
src_tensorout_quant_typeaxisDEQUANT_SCALE_ROUNDING_MODEc                 C   s  | j }| |  kr|k sn J d||dkr|n|| }| || j d } |tjk}|tjtjfv }|s<|s<J |r@dnd}| jd }|rV|d dksVJ d| | jdd || f }	| jdd t|t	f }
| j
|	|d}| j
|
tjd}|  dkr| d| jd }|d|jd }|d|jd }d	}t	j}t|jd |}t|jd |}t||f |g| || || |j|||jR d
di ||| j d }||| j d }||fS )a  
         Convert the src weights to mx format. The src weight is quantized along the axis dimension.

         If weight_quant_type is torch.uint8, we output mxfp4 where two e2m1 values are packed into a single byte.
         Note that this means the k_dim of the tensor will be half of the logical k_dim.

         If weight_quant_type is torch.float8_e4m3fn or torch.float8_e5m2, we output mxfp8 with the float8s are stored
         in their respective formats.
    Invalid axis axis=r   r      z.axis dim must be divisible by 2 for e2m1. Got Ndtype   	num_warps   )ndim	transposetorchuint8float8_e4m3fnfloat8_e5m2shapetritoncdivr   	new_emptynumelreshapeviewvaluer   stride)r   r   r   r   r   is_fp4is_fp8divisorL	out_shapeout_scale_shapeout_quant_tensor	out_scalekernel_src_tensorkernel_quant_tensorkernel_scaleBLOCK_OUT_DIMBLOCK_QUANT_DIMgrid_out
grid_quantr   r   r   downcast_to_mxfp   sV   &

r:   tensorscaletarget_dtypec                 C   s  | j }| |  kr|k sn J d||dkr|n|| }| j |j ks2J d| j d|j | jtjtjtjhv sEJ d| j|jtjksSJ d|j|tjtjtjfv sdJ d|| j	| | jtjkrpdnd	 }| 
|| j d	  } |
||j d	  }tjg | j	d
d |R || jd}|d|j	d }| d| j	d }|d|j	d }	d}
tj}t|j	d |
}t|j	d	 |}t||f |g| |	|	 || |j	|
|R ddi |
||j d	  }|S )aC  
    Upcasts an mxfp (packed) weight tensor back to float16 or bfloat16.

    The function assumes that the tensors were quantized along the given axis.
    It permutes the tensor so that the quantized axis is last, reshapes to 2D,
    launches the Triton upcast kernel, and then unpermutes back to the original order.
    r   r   zJWeight and scale must have the same number of dimensions. Got tensor.ndim=z and scale.ndim=z"Invalid tensor dtype tensor.dtype=z Invalid scale dtype scale.dtype=z"Invalid output dtype target_dtype=r   r   Nr   r   devicer   r   r   )r   r   r   r   r!   r    float16bfloat16float32r"   r   
contiguousemptyr?   r(   r   r)   r#   r$   r   r*   )r;   r<   r=   r   r   logical_quant_dimoutreshaped_outreshaped_tensorreshaped_scaler6   r7   blocks_out_dimblocks_quant_dimr   r   r   upcast_from_mxfpE   sR   &
"&rL   c                 C   s   | |? dd| > d @ S )Nr       r   )xshiftr   r   r   right_shift_unsignedn   s   rP   r   c                 C   s*   t jdt jdt jdi}| |v sJ ||  S )Ng      @g      @g      |@)r   r   r!   r    )r   dr   r   r   get_max_quant_vals   s   rR   c           )   	   C   s  | j }| |  kr|k sn J d|| jtjtjtjhv s)J d| j |dkr/|n|| }|tjk}dt|v }|sI|sIJ d| | j}|r]| 	|}|d dks]J d| 
|| j d tj}	|	jd }t|tt }
|
| }t|	d|f}ttj|	tjd	d|f}|	d}t|}t||tjd
||jd}|jdd |t tf }|j| }|jddd\}}t|}|| }|tj}|tjkr|d d@ }n|d@ }|tj}t|dktjd|dd| }|j}|j| }|| }||}|dd|f }|r"t|| |}||}n|s,J d| | tj}|d@ }t|dd@ } |d@ }!d}"d}#t| |"k dt|!dB |"|  d ? |!}!t | tj|"|# |d|"|#  } t| d> t|!dB d d}$t!|$tjd|d}$t|d|$B tj}%|%jg |%jdd |d dR  }%|%d }&|%d }'|&|'d > B }|j|j d? tj}(|("d}(|
|| j d }|(
|| j d }(||(fS )!u  
    Converts the src tensor to the output format specified by out_quant_type.
      axis: The axis along which the tensors are contiguous and quantization is applied.
      DEQUANT_SCALE_ROUNDING_MODE: 0 for ROUND_UP, 1 for ROUND_DOWN.

    Returns:
      out_quant_tensor: Quantized tensor in mx format.
         • For mxfp8, the output has the same shape as src_tensor.
         • For mxfp4, the size along the axis is halved, and the tensor is returned as a torch.uint8.
      scale: Scale tensor (stored as uint8) computed per group of 32 elements along the axis.
             Its shape is the same as src_tensor except that the axis is replaced by ceil(L/32),
             where L is the original length along that axis.
    r   zInvalid input tensor dtype r   float8r   z=For mxfp4 conversion the contiguous axis length must be even.r   r   r   g      )r?   r   NT)dimkeepdimi i          )r?   g      ?.z!Invalid output quantization type l                 i  @          ).r   ).r      )#r   r   r   rB   rA   r@   r   strr?   sizer   tor"   r#   r$   r   Fpad	ones_likeboolabswherer;   r(   maxrR   int32r   r   clamprC   rP   maximumminimumsqueeze))r   r   r   r   r   r+   r,   r?   
axis_shapesrcnext_multiple
pad_amount
padded_src
valid_maskpadded_axis_shapeabs_f	new_shape
abs_groupsmax_val_max_quant_valdequant_scaleds_intds_int_roundeddequant_scale_roundedquant_scaleorig_padded_shapepadded_src_groupsquant_tensor
out_weightq_intsigns	exponents	mantissasE8_BIASE2_BIASe2m1_tmp
e2m1_valueevensoddsdq_scaler   r   r   downcast_to_mxfp_torchy   s   &







 


"&
r   c                 C   s   | j tjksJ | tj} | d@ }| d? d@ }g d}tj|tj| jd}t|| g}|| }|| }tj	||gdd}|j
g | jd d dR  }|S )N   r]   )rV   g      ?r   g      ?r      r]      r>   r   )rT   )r   r   r   r`   rh   r;   rB   r?   catstackr(   r"   )input_tensorr   r   valsoutputseven_floats
odd_floatsoutput_tensorr   r   r   cvt_e2m1_to_fp32   s   r   c                 C   s  | j }| |  kr|k sn J d|| jtjkp!| jtjk}|s2| jtjks2J d| j |dkr8|n|| }|||j d }| || j d } |tjd> 	tj
}| jtjkret| }n| tj
}| jd | jtjkrwdnd }|d}	t|tt }
|
|	 }t|d|f}|jd }|jdd |t tf }|j	| }|d}|| }|j	g |jdd |R  }|d	d|	f }|| }||| j d }|S )
z
    Converts the mxfp4/mxfp8 tensor to the target format specified by target_dtype.
      axis: The axis along which dequantization is applied.

    Returns:
      out_weight: Tensor in the target format.
    r   z Invalid input quantization type r   r   rW   r   r   N.)r   r   r   r    r!   r   r   r`   rh   r(   rB   r   r"   r_   r#   r$   r   ra   rb   	unsqueezerC   )r;   r<   r=   r   r   r,   r   fp32_tensorrE   rm   rs   pad_sizepadded_tensornew_axis_shaperu   dq_scale_padded
out_padded
out_tensorr   r   r   upcast_from_mxfp_torch   s4   	& 




r   )enumr   r#   r   torch.nn.functionalnn
functionalra   mxfp_details._upcast_from_mxfpr   mxfp_details._downcast_to_mxfpr   r   r   r   r   Tensorr   intr:   rL   rP   rR   r   r   r   quantize_mxfp8_fnr   r   r   r   <module>   s*   
 1)
u /