o
    Ti                     @   s   d dl Z d dlZd dlmZ d dlm  mZ d dlmZ d dl	m
Z
 d dlmZmZ ddlmZ G dd dejZG d	d
 d
ejZdS )    N)Optional)get_accelerator)	QuantizerFP_Quantize   )QuantizationConfigc                	       s   e Zd ZdZ				ddeej dedede	fdd	Z
d
ejfddZdejfddZdddZdd Zdd Zdd Zdd Zd ddZ fddZ  ZS )!QuantizedParametera  
    Quantized parameter class that implements weight quantization. Weights
    are stored in quantized form on GPUs, and can be dequantized on-the-fly when
    needed by the model. The weights are actually quantized during any `.to(device)`.

    Arguments:
        data (Tensor): parameter tensor.
        requires_grad (bool, optional): if the parameter requires gradient. Defaults
            to False and is not supported to be True. Argument provided only for interface
            compatibility with torch.nn.Parameter.
        quantization_config (QuantizationConfig, optional):
        quantizer (Quantizer, optional): Defaults to FP_Quantize but can be any quantizer
            that implements deepspeed.ops.fp_quantizer.Quantizer. This argument is also
            required since the quantizer is stashed in the Parameter itself, some models
            may clone the Parameter by passing an attribute __dict__. For an example, see
            tests/unit/linear/test_quant_param.py::TestQuantParam::test_hf_clone
    NFdatarequires_gradquantization_config	quantizerc                 C   sn   |rt d|d u rtd}tj| ||}|d u rt n||_|d ur)||_nt|jd|_|	| |S )Nz;requires_grad=True is not supported with QuantizedParameterr   r   )

ValueErrortorchemptyTensor_make_subclassr   r   r   r   _ensure_quantized)clsr	   r
   r   r   self r   Q/home/ubuntu/.local/lib/python3.10/site-packages/deepspeed/linear/quantization.py__new__%   s   

zQuantizedParameter.__new__tensorc                 C   s   t  |r@|j| jjkrBt  t  |j | jj	|j
| jj| jjd|_
W d    n1 s2w   Y  |j| jjksDJ d S d S d S )Nq_bitsq_mantisa_bits)r   on_acceleratordtyper   q_dtypestreamcurrent_streamdevicer   quantizer	   r   mantissa_bits)r   r   r   r   r   r   :   s   

z$QuantizedParameter._ensure_quantizedreturnc                 C   sx   t  | jr9| jj| jjkr9t  t  | jj | j	j
| j| jj| jjdW  d   S 1 s4w   Y  | jS )zW
        Return a tensor containing the dequantized weights of this parameter.
        r   N)r   r   r	   r   r   r   r    r!   r"   r   
dequantizer   r$   )r   r   r   r   dequantizedC   s   
 zQuantizedParameter.dequantizedc                 C   s:   t | ddr|r| t  | _d S | d| _d S d S )N
ds_offloadFcpu)getattrtor   current_device_namer	   )r   revertr   r   r   offloadN   s
   zQuantizedParameter.offloadc                 C   s(   | j }| j|d< | j|d< | j|d< |S )Nr	   r   r
   )__dict__r	   r   r
   r   stater   r   r   __getstate__U   s
   


zQuantizedParameter.__getstate__c                 C   s,   |d | _ |d | _|d | _|d | _d S )Nr   r   r	   r
   )r   r   r	   r
   r0   r   r   r   __setstate__\   s   


zQuantizedParameter.__setstate__c                 C   sX   t | t | }|  }|| t|d |_t|d |_t|d |_|S )Nr   r   r	   )	typer   r2   r3   copydeepcopyr   r   r	   )r   memonew_instancer1   r   r   r   __deepcopy__b   s   
zQuantizedParameter.__deepcopy__c                 C   s(   t | t | }|  }|| |S N)r4   r   r2   r3   )r   r8   r1   r   r   r   __copy__k   s   
zQuantizedParameter.__copy__c                 C   s.   |d u rdn|}| j j||d | j||dS )Ncuda)non_blocking)r   r+   )r   r"   r=   r   r   r   r<   q   s   zQuantizedParameter.cudac                    s2   t  j|i |}| jj|i | | | |S )zt
        Move the parameter to the given device. Then, if the device is a cuda device,
        quantize it.
        )superr+   r   r   )r   argskwargsr   	__class__r   r   r+   v   s   
zQuantizedParameter.to)NFNN)F)NF)__name__
__module____qualname____doc__r   r   r   boolr   r   r   r   r'   r.   r2   r3   r9   r;   r<   r+   __classcell__r   r   rA   r   r      s0    
	
	
r   c                	       sR   e Zd ZdZddejfdedededef fdd	Z	d
ej
dej
fddZ  ZS )QuantizedLinearz
    Linear layer that implements weight quantization. Parameters
    are stored via `QuantizedParameter` and are dequantized on-the-fly during any
    forward pass.
    FN	input_dim
output_dimbiasr   c                    s<   t  j||||d |tjksJ dt| jj|d| _d S )N)rL   r   z&currently only supports bfloat16 dtyper   )r>   __init__r   bfloat16r   weightr	   )r   rJ   rK   rL   r   r   rA   r   r   rM      s   zQuantizedLinear.__init__inputr%   c                 C   s   t || j | jS r:   )FlinearrO   r'   rL   )r   rP   r   r   r   forward   s   zQuantizedLinear.forward)rC   rD   rE   rF   r   rN   intrG   r   rM   r   rS   rH   r   r   rA   r   rI      s    	
rI   )r5   r   torch.nnnntorch.nn.functional
functionalrQ   typingr   deepspeed.acceleratorr   deepspeed.ops.fp_quantizerr   r   configr   	Parameterr   LinearrI   r   r   r   r   <module>   s   o