o
    TiQ                     @   sd   d dl Z d dlZd dlmZ d dlZd dlmZ d dlmZ daG dd deZ	G dd de	Z
dS )	    N)ABC)FPQuantizerBuilder)get_acceleratorc                   @   sT   e Zd ZdZddddZej						ddejfd
dZ	ejddejfddZ
dS )	Quantizerz
    Abstract Quantizer class that implements quantize/dequantize methods.

    Arguments:
        group_size (int, optional): number of values or elements that are grouped
            together for the quantization process.
       returnNc                 C   s
   || _ d S N
group_size)selfr
    r   W/home/ubuntu/.local/lib/python3.10/site-packages/deepspeed/ops/fp_quantizer/quantize.py__init__   s   
zQuantizer.__init__      Fc                 C      d S r   r   )r   inputq_bitsq_mantisa_bitsstochastic_modereturn_meta_tensorr   r   r   quantize   s   zQuantizer.quantizec                 C   r   r   r   r   input_qfp_outr   r   scaler   r   r   
dequantize&   s   zQuantizer.dequantize)r   r   Nr   r   FFNr   r   N)__name__
__module____qualname____doc__r   abcabstractmethodtorchTensorr   r   r   r   r   r   r      s    r   c                       st   e Zd Zd fddZ				ddejfdd	Zd
d Zdd ZddejfddZ					ddejfddZ
  ZS )FP_Quantizer   Nc                    sR   t  j|jd td u rt  attdd| _|| _d | _	d | _
d | _d | _d S )Nr	   	CUDA_IMPLT)superr   r
   fp_quant_moduler   loadgetattr	cuda_implq_config
orig_dtype
num_groupsr   r   )r   quantization_config	__class__r   r   r   -   s   

zFP_Quantize.__init__r   r   Fc                 C   s,  |j tjks
J d|r|dksJ d|j | _|j| _|dkr!n|dkr(d}n|dkr/d}n|dkr6d}nJ d
| d| | j | _tj	| jt
| j| d d tj|jd| _t| j|| j|||}|r|j| jdd\}| _| |j}| j | _| `~t  t   || jfS |S )Nzonly support bf16 for nowr   z*meta tensor is only supported with q_bit=8               r   Missing zZ-quantization, please add the template arguments for the kernel to support this precision!dtypedevicedim)r<   r&   bfloat16r0   shape
orig_shapenumelr
   r1   onesintuint8r=   r   r+   r   splitr   
contiguousreshapegccollectr   empty_cache)r   r   r   r   r   r   outdatar   r   r   r   :   sB   


zFP_Quantize.quantizec                 O   sD   t | dr| jj|i || _t | dr | jj|i || _d S d S )Nr   r   )hasattrr   tor   )r   argskwargsr   r   r   rQ   d   s
   

zFP_Quantize.toc                 C   s   t | j| jS r   )r+   
get_scalesr   r1   )r   r   r   r   rT   k   s   zFP_Quantize.get_scalesc                 C   s   | j d us	J d|d u rtj| j| j |jdn|}|dkrn|dkr&d}n|dkr-d}n|dkr4d}nJ d
| d|d ur\| | ksLJ dtj|d| j|gdd	 }t
||| j||| d  |S )NG[De-quantization Error]: you need to call quantize before dequantizing!r;   r   r5   r6   r7   r8   r9   r   r:   \-dequantization, please add the template arguments for the kernel to support this precision!l[De-quantization Error]: quantized data should have the same size as original tensor when scale is not None!r>   r?   )r0   r&   emptyrC   r=   rD   catrJ   r
   rI   r+   r   r   r   r   r   r   n   s6   
 zFP_Quantize.dequantizec              	   C   s  t | drt| jdksJ d| jd usJ d|d u r3tj|jd g| jdd  R | j|jdn|}|dkr:n|d	krAd
}n|dkrHd}n|d
krOd}nJ d| d|d urw| | ksgJ dtj	|
d| j|gdd }t|||| j||| d  |S )NrC   r   zoSelective-Dequantization works on 3d tensor only! Please reshape the tensor before calling dequantize function.rU   r   r9   r;   r   r5   r6   r7   r8   r:   rV   rW   r>   r?   )rP   lenrC   r0   r&   rX   rB   r=   rD   rY   rJ   r
   rI   r+   selective_dequantize)r   r   indexesr   r   r   r   r   r   r   r[      sF   

 
z FP_Quantize.selective_dequantizer   r   r   )r    r!   r"   r   r&   r'   r   rQ   rT   r   r[   __classcell__r   r   r3   r   r(   +   s&    
*r(   )r&   r$   r   rK   deepspeed.ops.op_builderr   deepspeed.acceleratorr   r+   r   r(   r   r   r   r   <module>   s   