o
    Ti                     @   sf   d dl Z zd dlmZ d dlmZ W n ey!   d dlmZ Y nw G dd deZG dd dZdS )    N)__deepspeed__)	OpBuilderc                       sZ   e Zd ZdZdZd fdd	Zdd Zdd	 ZdddZe	dd Z
e	dddZ  ZS )FPQuantizerBuilderDS_BUILD_FP_QUANTIZERfp_quantizerNc                    s$   |d u r| j n|}t j|d d S )N)name)NAMEsuper__init__)selfr   	__class__ ]/home/ubuntu/.local/lib/python3.10/site-packages/deepspeed/ops/op_builder/hpu/fp_quantizer.pyr
      s   zFPQuantizerBuilder.__init__c                 C   s   d| j  dS )Nzdeepspeed.ops.fp_quantizer._op)r   r   r   r   r   absolute_name   s   z FPQuantizerBuilder.absolute_namec                 C   s   g S Nr   r   r   r   r   sources      zFPQuantizerBuilder.sourcesTc                 C   s   t S r   )FPQuantizer)r   verboser   r   r   load   r   zFPQuantizerBuilder.loadc                   C   s   t jS r   )torchfloat8_e4m3fnr   r   r   r   get_default_quant_dtype"   s   z*FPQuantizerBuilder.get_default_quant_dtypec                 C   sB   dd l m  m  m} | |jjkrtj}ntj}t	|j
S )Nr   )*habana_frameworks.torch.utils.experimentalr   utilsexperimental_get_device_typesynDeviceTypesynDeviceGaudi2float8_e4m3fnuzr   finfomax)q_bitshtexpdtyper   r   r   get_quant_range&   s
   z"FPQuantizerBuilder.get_quant_ranger   )T)__name__
__module____qualname__	BUILD_VARr   r
   r   r   r   staticmethodr   r(   __classcell__r   r   r   r   r      s    

r   c                   @   s@   e Zd ZdZedd Zedd Zedd Zedd	 Zd
S )r   Fc                 C   s   J d)NFz/Selective dequantize isn't implemented for HPU!r   )clsval_qscalesindexes
group_sizeq_mantisa_bitsq_exponent_bitsr   r   r   selective_dequantize3   s   z FPQuantizer.selective_dequantizec           
      C   s6   |j }|j}tjj|d| ||}	||	 |S )Ng      ?)shaper'   r   opshpucast_from_fp8viewcopy_)
r/   fp_outinput_qscaler3   r4   r5   
orig_shape
orig_dtypedequant_outr   r   r   
dequantize7   s
   
zFPQuantizer.dequantizec                 C   s   |dksJ d|dksJ d|j jsJ d|j\}}||| }	|	 jdddd	 }
t|j j}||
 }|	| tj
jj|	|||j d
\}}|	| |S )N   z1Quantize on HPU only supports quantization to FP8   z1Quantize on HPU only supports q_mantissa_bits = 3z+Quantization on HPU is only to float dtypes   T)dimkeepdimr   )r'   )r'   is_floating_pointr7   r;   floatabsr$   r   r#   r<   r8   r9   cast_to_fp8_v2)r/   outvalr?   r3   stochastic_roundingr%   r4   
num_groupsval_reshapedmax_valsq_range	tmp_scalequant_r   r   r   quantize?   s   


zFPQuantizer.quantizec                 C   s   |S r   r   )r/   rM   rP   r   r   r   
get_scalesT   s   zFPQuantizer.get_scalesN)	r)   r*   r+   	CUDA_IMPLclassmethodr6   rC   rW   rX   r   r   r   r   r   0   s    


r   )	r   
op_builderr   op_builder.builderr   ImportError deepspeed.ops.op_builder.builderr   r   r   r   r   r   <module>   s   