o
    Ti,                     @   s   d dl Z d dl mZ d dlmZ d dlZd dlmZ ddlmZ ddlm	Z	m
Z
 d dlmZ d d	lmZ d d
lmZ ddlmZ d dlZdejdedejfddZdS )    N)nn)Dict)layers   )QUANTIZATION_LAYER_MAPPINGS)$get_AsyncPartitionedParameterSwapperrecursive_setattr)logger)deque)ContextManagers)QuantizationContextmodel	ds_configreturnc                    sF  i }d}d|v sJ d|d d }t | }d|v o'd|d v o'|d d dk d|v o1d|d v } t_ r@tt||d	gnt }| ttd
d | 	 }|s^|j
 fddd t|}t|dkr| \}	}
d}d}| D ]\}}||	v r|du sJ |	 d| d| |}|}qv|du rqb r|
j  |
jjtjksJ dtt|
 ||
} r|
j  t| |	| ||vrg ||< || |	 |d7 }t  t|dkshW d   n1 sw   Y  tj  td| d d}| D ]\}}|d| d7 }|D ]}	|d|	 d7 }qq t| | S )a@  [Experimental] Apply group-wise weight quantization to model. Replace layers module according to config_list

    Args:
        model (nn.Module): A nn.Module
        ds_config (Dict, optional): The ds_config dictionary. use None for non-deepspeed managed model.

    Returns:
        nn.Module: Quantized nn.Module
    r   weight_quantizationz/Please provide quantization config in ds_configpost_init_quantzero_optimizationstage   offload_param)config_dict_or_pathparam_swapperc                 S   s   t | d tv S Nr   )typer   named_module r   a/home/ubuntu/.local/lib/python3.10/site-packages/deepspeed/inference/quantization/quantization.py<lambda>6   s    z6_init_group_wise_weight_quantization.<locals>.<lambda>c                    s"    r
| d j j S | d j  S r   )weight	ds_tensornumelr   is_zero3_enabledr   r   r   :   s   )keyNz( matched multiple quantization key word z and z!Model weight is expected in half.r   z0Group-wise weight quantization summary: convert z$ node(s) to quantized implementation
zKey: z, matched modules:
	)r   r   r#   r   r   
contextlibsuppresslistfilternamed_modulessortr
   lenpopleftitemsr   
all_gatherdtypetorchfloat16r   r   	partitionr   appendgccollectquantized_weight_registryclearr	   info)r   r   matched_module_list_by_keymatched_module_countquantization_confignvme_swapperis_offloading_enabledcontext_mgrmodule_listmodule_namemodulematched_keymatched_quantization_configr$   config
new_modulesummary_strr   r"   r   $_init_group_wise_weight_quantization   s   

 


/

rI   )r2   r   typingr   r6    deepspeed.inference.quantizationr   r   utilsr   r   deepspeed.utils.loggingr	   collectionsr
   transformers.utils.genericr   quantization_contextr   r'   ModulerI   r   r   r   r   <module>   s   