o
    ܹi-                     @   s,   d Z ddlZG dd dZG dd dZdS )aN  
Helper classes for working with low precision floating point types that
align with the opencompute (OCP) microscaling (MX) specification.
  * MXFP4Tensor: 4-bit E2M1 floating point data
  * MXScaleTensor: 8-bit E8M0 floating point data
Reference: https://www.opencompute.org/documents/ocp-microscaling-formats-mx-v1-0-spec-final-pdf
    Nc                   @   s>   e Zd ZdddZdd Zdd Zdd	 Zd
d Zdd ZdS )MXFP4TensorNc                 C   j   || _ |durt|tjsJ d|j | _ | || _dS |dur1t|tr+|| _dS |f| _dS td)at  
        Tensor class for working with four bit E2M1 floating point data as defined by the
        opencompute microscaling specification.


        Parameters:
        - data: A torch tensor of float32 numbers to convert to fp4e2m1 microscaling format.
        - size: The size of the tensor to create.
        - device: The device on which to create the tensor.
        N%Parameter data must be a torch tensor.Either parameter data or size must be provided	device
isinstancetorchTensor_from_floatdatatuplesize
ValueErrorselfr   r   r    r   N/home/ubuntu/veenaModal/venv/lib/python3.10/site-packages/triton/tools/mxfp.py__init__   s    zMXFP4Tensor.__init__c                 C   sp   t jdd| jt j| jd}t jdd| jt j| jd}t jdd| jt j| jd}|d> |d> B |B t j| _| S )Nr      r   dtyper            )r	   randintr   uint8r   typer   )r   SEMr   r   r   random#   s
   zMXFP4Tensor.randomc                 C   s
  |t jks	J d| j}|d? d@ |}|d? d@ |}|d@ |}t |}|dk|dk@ }| }| rs|| }	|| }
|| }t d|	}t |
dk|
|
d }t |
dk|d d|d  }|t d| | }|||< |||dk@   d9  < |t jS )	z
        Convert fp4e2m1 data to float32.

        Returns:
        - A torch tensor of type dtype representing the fp4e2m1 data.
        zCCurrently only float32 is supported for fp4e2m1 to float conversionr   r   r         ?      ?r   )r	   float32r   r   
zeros_likeanypowwhere)r   r   r   r   r   r    valueis_zeronon_zero_maskS_nzE_nzM_nzsignexponentmantissavalue_nzr   r   r   to+   s&   
zMXFP4Tensor.toc                 C   sT  t |t j}t |}|dk}t |t |B }t jg dt j| jd}t jddgt j| jd}g }g }	g }
|D ]M}|dkrcd}|D ]}|d }|d|  }|	| |		| |
	| qFq<|
 d }|D ]}d|d  }|d|  }|	| |		| |
	| qkq<t j|t j| jd}t j|	t j| jd}	t j|
t j| jd}
|d}|jd }|d}| 
 }|||d< t ||d }t j|dd	d
\}}||k}| dkr|
d|d}|dkt j}||d  }t j|dd}|	| }|
| }||j}||j}d||< d||< |d> |d> B |B t jS )a5  
        Convert float32 numbers to mxf4 e2m1 format.
        * No encodings are reserved for Inf or NaN in mxf4.
        * Conversion from float supports roundTiesToEven rounding mode.
        * If a value exceeds the mxf4 representable range after rounding,
          clamps to the maximum mxf4 magnitude, preserving the sign.
        * If a value has magnitude less than the minimum subnormal magnitude
          in mxf4 after rounding, converts to zero.

        Parameters:
        - values: A torch tensor of float32 numbers to convert to fp4 format.
        r   )r   r   r   r   r   r   r   r#   r   r$   r"   T)dimkeepdimgư>r6   r   )r	   signbitr   r   absisnanisinftensorr   appenditemr%   viewshape	unsqueezemaxminsumexpandint32argmin)r   valuesr   
abs_valuesr+   
is_invalidE_bitsM_bitscandidate_valuescandidate_Ecandidate_Mr   r1   r    significandr*   
candidatesabs_values_flatNabs_values_expandedmax_candidate_valueerrors
min_errors_is_tieM_bits_expandedtie_breakerbest_indices
E_selected
M_selectedr   r   r   r   N   sd   







zMXFP4Tensor._from_floatc                 C   s   | j }d|  kr|jk sJ d J d||}|d d }|d dkrIdgd|j  }|j| d d d }d||< tjjj||ddd}t|j}|||< |	|d d |j
| }||d d}||d d}	|	d> |B }
|
S )a  
        Packs two e2m1 elements into a single uint8 along the specified dimension.

        Parameters:
        - dim: The dimension along which to pack the elements.

        Returns:
        - A torch tensor of dtype uint8 with two e2m1 elements packed into one uint8.
        r   zHThe dimension to pack along is not within the range of tensor dimensionsr   r   constant)moder*   r   )r   ndimr   r	   nn
functionalpadlistrA   insertreshapeselect)r   r6   r   size_along_dimnew_size_along_dim	pad_sizes	pad_index	new_shapelowhighpackedr   r   r   to_packed_tensor   s*   



zMXFP4Tensor.to_packed_tensorc                 C   s   |d? d@ }|d@ }t j||f|d d}t|j}|d| || d g ||d d  }|j| }	|| d dkrStdg|	j }
td|| |
|< |	t|
 }	|	t j	S )a  
        Unpacks a tensor where two fp4 elements are packed into a single uint8.

        Parameters:
        - packed_tensor: The packed tensor
        - dim: The dimension along which the tensor was packed.
        - original_shape: The shape of the original tensor before packing.

        Returns:
        - A tensor with the original data unpacked into uint8 elements containing one
          fp4e2m1 element in the least significant bits.
        r      r   r8   Nr   r   )
r	   stackrf   rA   rh   slicerb   r   r   r   )r   packed_tensorr6   original_shaperp   ro   stackedrA   rn   r   indicesr   r   r   unpack_packed_tensor   s   
*
z MXFP4Tensor.unpack_packed_tensorNNN)	__name__
__module____qualname__r   r!   r4   r   rr   rz   r   r   r   r   r      s    
#X#r   c                   @   s0   e Zd Zd
ddZdddZdd Zdd	 ZdS )MXScaleTensorNc                 C   r   )a6  
        Tensor class for working with microscaling E8M0 block scale factors.

        Parameters:
        - data: A torch tensor of float32 numbers to convert to fp8e8m0 microscaling format.
        - size: The size of the tensor to create.
        - device: The device on which to create the tensor.
        Nr   r   r   r   r   r   r   r      s   	 zMXScaleTensor.__init__c              
   C   s   d}|du rdnt dttt|| }|du rdntdt dttt|| }||ks7J dtj||d | jtj| j	d}|| _
| S )zp
        Generate random E8M0 data within a specified range.
        * Excludes the NaN encoding (255).
           Nr      z&Low must be less than or equal to highr   r   )rC   intr	   log2r=   rD   r   r   r   r   r   )r   ro   rp   biasmin_exponentmax_exponentr   r   r   r   r!      s   *0zMXScaleTensor.randomc                 C   s^   |t jks	J d| j|}|dk}| }d||< |d }t d|}t j||< ||S )NzBCurrently only float32 is supported for f8e8m0 to float conversion   r   r   g       @)r	   r%   r   r   cloner(   nan)r   r   r   is_nane_biaseder*   r   r   r   r4     s   

zMXScaleTensor.toc           	      C   s   t j|t j| jd}t |t |B |dkB }d||< ||  }t t |}|d }|t j	}t 
|dd}|t j|| < |S )aO  
        Convert float32 numbers to E8M0 format.
        * Values <= 0, NaNs, and Infs are converted to the NaN encoding (255).
        * Positive values are converted by computing the floor of log2(value) to get the exponent.

        Parameters:
        - values: A torch tensor of float32 numbers to convert to E8M0 format.
        r5   r   r   r   r   )r	   
empty_liker   r   r;   r<   floorr   r   rG   clamp)	r   rI   resultrK   valid_valuesr   r   e_biased_inte_biased_clampedr   r   r   r     s   	
zMXScaleTensor._from_floatr{   )NN)r|   r}   r~   r   r!   r4   r   r   r   r   r   r      s
    

r   )__doc__r	   r   r   r   r   r   r   <module>   s
     ^