o
    Ti.                     @   s8  d dl Z d dlZd dl mZ d dlmZ d dlmZ d dlmZmZm	Z	 d dl
mZ d dlZe  r8e  ndZdadd Zd	ed
efddZd	ed
efddZG dd dZG dd dZdejfddZdd Z	d'dedededed
e	ejef f
ddZdejd efd!d"Zd#d$ Zd%d& Zejej fZ!dS )(    N)Tensor)Tuple)DictCallableUnion)get_acceleratorcpuc                   C   s   t d u rtjj  a t S )N)quantizer_module	deepspeedops
op_builderQuantizerBuilderload r   r   Z/home/ubuntu/.local/lib/python3.10/site-packages/deepspeed/inference/quantization/utils.pyget_quantizer_module   s   r   tensorreturnc                 C   sD   | j jdkr| jtjkr| jtjd||jtjdS | ||S Nr   )dtype)devicetyper   torchfloat16tofloat32clamp_)r   minmaxr   r   r   tensor_clamp   s    r   c                 C   s<   | j jdkr| jtjkr| jtjd jtjdS |  S r   )r   r   r   r   r   r   r   round_)r   r   r   r   tensor_round#   s   r!   c                   @   sh   e Zd ZdeddfddZdedeeeef fddZdedeeeef fd	d
ZdedefddZ	dS )	Quantizerconfigr   Nc                 C   sD   || _ | j d dks| j d dksJ d| j d dks J dd S Nnum_bits      z-Only INT4 and INT8 quantization is supported.	symmetricFz9Only asymmetric quantization is supported at this moment.)r#   )selfr#   r   r   r   __init__-   s   zQuantizer.__init__r   c                 C   s  |j | jd  | jd  dksJ d|j  d| j t|}|j }|| jd  | jd  }|d | jd  || jd f || jd d d   }||}| |\}}}||}| jd dkrk| |||fS | jd d	krw|||fS J d| jd )N	group_dim
group_sizer   zTensor shape: z quantization config    r%   r&   r'   F Unsupported quantization bits {})shaper#   r   cloneview_quantize_int8_compress_uint8_to_uint4format)r)   r   r/   
num_groups	new_shapequantized_tensorscale	min_valuer   r   r   quantize3   s"    



zQuantizer.quantizec                 C   s   d| j d  d }|j| j d d dd}|j| j d d dd}|||  }|||}tt|d|tj	}|||fS )N   r%   r-   r+   T)dimkeepdimr   )
r#   aminamaxsub_mul_r!   r   r   r   uint8)r)   r   q_ranger9   	max_valuer8   r   r   r   r2   I   s   
zQuantizer._quantize_int8c                 C   sv   |j d d dksJ t|j }|d d |d< tj|tj|jd}t|ddd df d|ddd df }|S )Nr;   r   r   r   .r&   r-   )r/   listr   emptyrB   r   
bitwise_orbitwise_left_shiftr)   r   new_data_shapedatar   r   r   r3   T   s   
.z"Quantizer._compress_uint8_to_uint4)
__name__
__module____qualname__r   r*   r   r   r:   r2   r3   r   r   r   r   r"   +   s
    r"   c                   @   sj   e Zd ZdedejddfddZdeded	edefd
dZdeded	edefddZ	dedefddZ
dS )DeQuantizerr#   r   r   Nc                 C   sJ   || _ || _| j d dks| j d dksJ d| j d dks#J dd S r$   )r#   r   )r)   r#   r   r   r   r   r*   b   s   zDeQuantizer.__init__r   quant_scale	quant_minc           
      C   s  | j d d dkr| j d dks| j d dkr| j d t|jd kr| jtjkrtt  kr| j d }| j d dkrc|d }t	 
|d	|||| | | j d }t|j}|d	 d |d	< n!| j d dkrt	 |d	|||| | | j d }t|j}||S | j d dkr| |}n| j d dkrJ d| j d |j}|| j d  | j d  }|d | j d  || j d f || j d d d   }||}| ||||}	|	S )Nr,   r'   r   r%   r&   r+   r-   r;   rE   Fr.   )r#   lenr/   r   r   r   r   r   device_namer   $dequantize_int4_to_half_experimentalreshapenumelrG   $dequantize_int8_to_half_experimental_decompress_uint4_to_uint8r4   r1   _dequantize_int8)
r)   r   rR   rS   last_dimension_sizer7   r/   r5   r6   dequantized_tensorr   r   r   
dequantizei   s@   




zDeQuantizer.dequantizec                 C   sB   |j tjksJ tj|| j |jd}||}|||}|S )NrF   )r   r   rB   
zeros_liker   copy_div_add_)r)   r   rR   rS   rM   r   r   r   r[      s
   
zDeQuantizer._dequantize_int8c                 C   sb   t |j}|d d |d< tj|tj|jd}|d|ddd df< |d|ddd df< |S )	NrE   r;   rF   r&   .r      r-   )rG   r/   r   rH   rB   r   bitwise_right_shiftbitwise_andrK   r   r   r   rZ      s   
z&DeQuantizer._decompress_uint4_to_uint8)rN   rO   rP   r   r   r   r*   r   r^   r[   rZ   r   r   r   r   rQ   `   s
    &rQ   modelc                 C   s4   |   D ]\}}t|dr|jd ur|j  S qd S )Nnvme_swapper)named_parametershasattrrg   )rf   
param_nameparamr   r   r   $get_AsyncPartitionedParameterSwapper   s
   
rl   c                 C   s>   | d}| }|dd D ]}t||}q||d | dS )a:  
    Recursively set the attribute of a module.
    Args:
        model (`torch.nn.Module`)
            The model to set the attribute in.
        module_name (`str`)
            The name of the module to set the attribute in.
        module (`torch.nn.Module`)
            The module to set the attribute to.
    .NrE   )splitgetattr__setattr__)rf   module_namemodule
split_listoutputnamer   r   r   recursive_setattr   s
   
rv   Tquantized_weightrR   rS   return_paramc           	      C   s   | j }|j }|j }t| } t|}t|}dtjdtjdtjdtfdd}t| ||g}|r9tj|dd}|||||_|S )	Nshape_wieghtshape_scale	shape_minr   c                    s*   dt jdttttf f fdd}|S )Ncompat_tensorr   c                    sh   t | dd }t | d  }t | d      }|||fS )Nr   )r   narrowrX   r1   )r|   weightr8   min_valr{   rz   ry   r   r   fn   s   
zGconcat_to_compat_param.<locals>.deconcat_individual_tensors.<locals>.fn)nn	Parameterr   r   )ry   rz   r{   r   r   r   r   deconcat_individual_tensors   s   &z;concat_to_compat_param.<locals>.deconcat_individual_tensorsF)requires_grad)	r/   r   flattenSizer   concatr   r   deconcat)	rw   rR   rS   rx   ry   rz   r{   r   r|   r   r   r   concat_to_compat_param   s    



r   rk   quant_configc                 C   s   t | dr	J dt|}t|| j}|| j\}}}|| j}|| j}|| j}t|||}|| _|j| _|| _	|| _
t| dd d S )Nweight_quantizedz%Parameter has already been quantized.T)ri   r"   rQ   r   r:   rM   r1   r   r   	quantizerdequantizersetattr)rk   r   r   r   rw   rR   rS   quantized_compat_tensorr   r   r   _quantize_param   s   r   c                    s*   t  dtdtjdtf fdd}|S )Ninputr~   r   c                    sl   t |dr*t|dr*||\}}}|j|tj||} | |g|R i |S  | |g|R i |S )Nr   )ri   ro   r   r   r^   r1   r   rB   )r   r~   argskwargsrw   rR   rS   temp_dequantized_weightfr   r   wrapper   s   z*wrap_quantized_functional.<locals>.wrapper)	functoolswrapsr   r   r   r   r   r   r   r   wrap_quantized_functional   s   	r   c                    s   t   fdd}|S )Nc              	      s   d }d }	t | jdrRt| jdrRt | jdsRt| jdd |d }	|	|v rR| jj||	 \}
}}|
| jj}
|| jj}|| jj}||	 }t|
||||	<  | ||||||| |d urm|||	< t	| jd d S d S )Nr   state_dict_quantizedTr~   )
ri   r~   ro   r   r   r:   r1   r   r   delattr)rf   
state_dictprefixlocal_metadatastrictmissing_keysunexpected_keys
error_msgsreplaced_old_valuekeyrw   rR   rS   r   r   r   r      s*   
z*wrap_load_from_state_dict.<locals>.wrapper)r   r   r   r   r   r   wrap_load_from_state_dict   s   r   )T)"r   r
   r   typingr   torch.nnr   r   r   r   deepspeed.acceleratorr   r   is_availablerU   r   r	   r   r   r!   r"   rQ   Modulerl   rv   boolr   r   r   r   r   Linear	EmbeddingWEIGHT_QUANTIZATION_LAYERSr   r   r   r   <module>   sB   5A
! 