o
    Tia                     @   s   d dl Z d dl mZ d dl mZ d dlmZ ddlmZmZm	Z	 d dl
mZmZmZ d dlmZ i Zd	Zd
ejdedejfddZded
ejdefddZG dd dejZG dd dejZejeejeiZdS )    N)nn)Tensor)
functional   )	QuantizerDeQuantizerconcat_to_compat_param)TupleCallableDict)register_external_parameterFpre_quant_weightquantize_weight_fnreturnc                 C   s^   t |tv rtt | }trt| | tt | S | \}}}t|||tt |< tt | S N)idquantized_weight_registryis_zero3_enabledr   r   )modelr   r   compat_tensorquantized_weightsquant_scale	quant_min r   [/home/ubuntu/.local/lib/python3.10/site-packages/deepspeed/inference/quantization/layers.pyget_quantized_weight_wrapper   s   
r   	quantizerc                    s$   dt tjttf f fdd}|S )Nr   c                     s@     j\} }}|  j} | j}| j}| ||fS r   )quantizedataviewdtypetype)r   r   r   r   r   r   r   func$   s
   
z$get_quantize_weight_fn.<locals>.func)r	   r   	Parameterr   )r   r   r#   r   r"   r   get_quantize_weight_fn"   s    r%   c                       >   e Zd Zdedejddf fddZdedefdd	Z  Z	S )
QuantizedLinearconfigpre_quant_layerr   Nc                    sv   t t| j|j|j|jd u|jj|jjd || _	t
|d| _|j| _t| |jt| j|j| _t||jj| j_d S )N)in_featuresout_featuresbiasdevicer    r(   )superr'   __init__r*   r+   r,   weightr-   r    r(   r   r   r   r%   r   dequantizer)selfr(   r)   	__class__r   r   r0   1   s   zQuantizedLinear.__init__inputc                 C   sB   | j | j \}}}| j j|tj||}tjj	||| j
S r   )r1   deconcatr2   
dequantizer   torchuint8_C_nnlinearr,   r3   r6   quantized_weightr   r   temp_dequantized_weightr   r   r   forward@   s
   zQuantizedLinear.forward)
__name__
__module____qualname__r   r   Linearr0   r   rA   __classcell__r   r   r4   r   r'   /   s    r'   c                       r&   )
QuantizedEmbeddingr(   r)   r   Nc                    s   t t| j|j|j|j|j|j|j|j	|j
|j
j|j
jd
 |jd u s'J d|jdks0J d|jdks9J d|j	dksBJ d|| _t|d}t| |j
t||j
| _
t||j
j| j
_d S )N)
num_embeddingsembedding_dimpadding_idxmax_norm	norm_typescale_grad_by_freqsparse_weightr-   r    zNot supported   Fr.   )r/   rG   r0   rH   rI   rJ   rK   rL   rM   rN   r1   r-   r    r(   r   r   r%   r   r2   )r3   r(   r)   r   r4   r   r   r0   M   s*   

zQuantizedEmbedding.__init__r6   c              	   C   sN   | j | j \}}}| j j|tj||}t||| j	| j
| j| j| jS r   )r1   r7   r2   r8   r   r9   r:   F	embeddingrJ   rK   rL   rM   rN   r>   r   r   r   rA   f   s   zQuantizedEmbedding.forward)
rB   rC   rD   r   r   	Embeddingr0   r   rA   rF   r   r   r4   r   rG   K   s    rG   )r9   r   r   torch.nnr   rQ   utilsr   r   r   typingr	   r
   r   deepspeed.runtime.zeror   r   r   r$   r   r%   rE   r'   rS   rG   QUANTIZATION_LAYER_MAPPINGSr   r   r   r   <module>   s    %