o
    ãÊi?  ã                   @   sŒ   d dl Z d dl mZ d dlmZ ddd„Zd	ed
efdd„Zd	edefdd„Zd	edefdd„Zdededefdd„Z	dedefdd„Z
dS )é    N)ÚTensor)ÚDTensorTé   é   c           
      C   s~  g }|| rdnd }d||  d }t |ƒD ]R}t| r%d|| |  d nd|| | d  d ƒ}t dd|¡}|dd… |dd…  d }	|d|d  |  |	  ¡ 7 }| rh|d|d  |   |	  ¡ 7 }q|dkr¥t dd|d ¡}|dd… |dd…  d }	|d|d  |  |	  ¡ 7 }| r¥|d|d  |   |	  ¡ 7 }| d¡ | d	¡ t|ƒd| ks¹J ‚| ¡  |S )
a+  
    Creates the dynamic quantiztion map.

    The dynamic data type is made up of a dynamic exponent and
    fraction. As the exponent increase from 0 to -7 the number
    of bits available for the fraction shrinks.

    This is a generalization of the dynamic type where a certain
    number of the bits and be reserved for the linear quantization
    region (the fraction). n determines the maximum number of
    exponent bits.

    For more details see
    (8-Bit Approximations for Parallelism in Deep Learning)[https://arxiv.org/abs/1511.04561]
    é   é   gš™™™™™¹?Néÿÿÿÿg       @é
   r   g      ð?)ÚrangeÚintÚtorchÚlinspaceÚtolistÚappendÚlenÚsort)
ÚsignedÚmax_exponent_bitsÚ
total_bitsÚdataÚnon_sign_bitsÚadditional_itemsÚiÚfraction_itemsÚ
boundariesÚmeans© r   úM/home/ubuntu/.local/lib/python3.10/site-packages/torchao/optim/quant_utils.pyÚcreate_dynamic_map   s4   ÿý € 

r   ÚinputÚ
block_sizec                 C   sD   | j }|  d|¡} |  ¡  d¡ d¡}| | dd¡ } |  |¡|fS )z(Scale tensor so that max(abs(input)) = 1r   gê-™—q=r   )ÚshapeÚviewÚabsÚamaxÚclip)r   r    r!   Úscaler   r   r   Úscale_tensor@   s
   r'   Úqmapc                 C   s6  t  | |d kdd¡}|t  | ||d  kdd¡7 }|t  | ||d  kdd¡7 }|t  | ||d  kdd¡7 }|t  | ||d  kdd¡7 }|t  | ||d  kdd¡7 }|t  | ||d  kdd¡7 }|t  | ||d	  kd	d¡7 }|d	 jd
d}|| }|| }| | }t  ||| d k||¡}| t j¡S )Né€   r   é@   é    é   r   é   r   r   éÿ   ©Úmaxç      à?©r   Úwherer%   ÚtoÚuint8©r   r(   ÚcodesÚcodes_upÚval_downÚval_upÚresidualr   r   r   Úquantize_8bit_with_qmapK   s   r<   c                 C   s¾   t  | |d kdd¡}|t  | ||d  kdd¡7 }|t  | ||d  kdd¡7 }|t  | ||d  kdd¡7 }|d jdd}|| }|| }| | }t  ||| d k||¡}| t j¡S )	Nr   r   r-   r   r   é   r/   r1   r2   r6   r   r   r   Úquantize_4bit_with_qmapa   s   r>   r7   r&   c                 C   s2   ||   ¡   |jd d¡| dd¡ }| | j¡S )Nr   r   r   )r   r"   r!   )r7   r(   r&   Úoutr   r   r   Údequant_with_qmaps   s   &r@   Ú_x_f32Úreturnc              	   C   s¦   t | tƒ}|r|  ¡ n| }tjdd|j|jtjd}| tj¡}|d@ }|d@ }t 	||k |d |¡}| tj
¡ ¡ }|rQtj|| j| jdt| jƒt|  ¡ ƒdS |S )Nr   i   )ÚdeviceÚdtypeiÿÿ  l     þ F)Ú	run_checkr!   Ústride)Ú
isinstancer   Úto_localr   Úrandintr!   rC   Úint32r"   r3   Úfloat32Úbfloat16Ú
from_localÚdevice_meshÚ
placementsÚtuplerF   )rA   Úis_dtÚx_f32Ú
rand_16bitÚ
x_f32_bitsÚ
x_fractionÚx_bf16_towards_zeroÚx_bf16_truncr   r   r   Ú_fp32_to_bf16_sry   s8   

ÿÿüø
úÿ
örX   )Tr   r   )r   r   Útorch.distributed.tensorr   r   r   r'   r<   r>   r@   rX   r   r   r   r   Ú<module>   s   
3