o
    wiw3                     @   s   d dl mZmZ ddlmZ ddlmZmZmZm	Z	 ddl
mZ ddlmZ er.ddlmZ e r7d d	lmZ e r>d d
lZe	eZdd ZG dd deZd
S )    )TYPE_CHECKINGAny   )prepare_for_hqq_linear)is_accelerate_availableis_hqq_availableis_torch_availablelogging   )HfQuantizer)get_module_from_name)PreTrainedModel)remove_hook_from_moduleNc                 C   s.   | dd d }| }|D ]}|j| }q|S )N.)split_modules)modelnamemodule_treeparentm r   b/home/ubuntu/sommelier/.venv/lib/python3.10/site-packages/transformers/quantizers/quantizer_hqq.pyfind_parent%   s
   r   c                       s  e Zd ZdZdZdZdZdgZ fddZdd Z	d	d
de
e dede
e fddZd	d
de
e de
e de
e fddZd	d
dddedeeef def
ddZd	d
dddedddeeef de
e fddZdd  Z			
d*d!d"Zd*d#d$Zd+d&d'Zedefd(d)Z  ZS ),HqqHfQuantizerz
    HQQ quantizer base HF class.
    nn.Linear modules are first tagged with quant_config in _process_model_before_weight_loading().
    The actual quantization and offloading to the GPU is done in check_quantized_param().
    FThqqc                    s$   t  j|fi | d | _d| _d S )NF)super__init__torch_dtypeusing_multi_gpu)selfquantization_configkwargs	__class__r   r   r   9   s   
zHqqHfQuantizer.__init__c                 O   s   t  std|dds|ddrtdtj s td| jd u r8d|v r/|d | _n	tj	| _t
d |d	d }t|tr`d
| v sOd| v rStdtt| dk| _d S d S )NzA valid HQQ version (>=0.2.1) is not available. Please follow the instructions to install it: `https://github.com/mobiusml/hqq/`.from_tfF	from_flaxzwConverting weights from tf/flax weights is currently not supported, please make sure the weights are in PyTorch format.z/No GPU found. A GPU is needed for quantization.r   zUSetting torch_dtype to torch.float32 as the default value since it was not specified.
device_mapcpudiskzYou are attempting to use an HQQ model with a device_map that contains a CPU or disk device. This is not supported. Please remove the CPU or disk device from the device_map.r
   )r   ImportErrorget
ValueErrortorchcudais_availableRuntimeErrorr   float32loggerinfo
isinstancedictvalueslensetr    )r!   argsr#   r(   r   r   r   validate_environment>   s.   



z#HqqHfQuantizer.validate_environmentr   r   missing_keysprefixreturnc                 K   s   | j r
dd |D S |S )Nc                 S   s   g | ]}d |vr|qS )weightr   ).0keyr   r   r   
<listcomp>b       z6HqqHfQuantizer.update_missing_keys.<locals>.<listcomp>)pre_quantized)r!   r   r<   r=   r#   r   r   r   update_missing_keys^   s   z"HqqHfQuantizer.update_missing_keysexpected_keysloaded_keysc                    sJ  | j s|S  fdd t|}t rddlm} | D ]\}}||_qt } || t }	|D ]|jjd D ]}
|
v rD|		 q9q1||	8 }|d d t
jddd d	h }t }|D ]tfd
d|D rp|	 q^||8 }|D ])d |v r|	d  n|fdd|D  d |v r|	d  qwt|S )Nc                    s:   |   D ]\}}t|tjjr||j  || qd S N)named_childrenr5   r.   nnLinearaddr   )r   layersr   module)_find_hqq_quantizable_layersr   r   rO   n   s
   zIHqqHfQuantizer.update_expected_keys.<locals>._find_hqq_quantizable_layersr   	HQQLinearskip_modulesr)   Flinear_layerquant_configcompute_dtypedevicedel_origbiasc                 3   s    | ]}| v V  qd S rH   r   )r@   _module)rA   r   r   	<genexpr>   s    z6HqqHfQuantizer.update_expected_keys.<locals>.<genexpr>z.weightc                    s   h | ]} d  | qS )r   r   )r@   _ref_key)rZ   r   r   	<setcomp>   rC   z6HqqHfQuantizer.update_expected_keys.<locals>.<setcomp>z.bias)rD   r9   r   hqq.core.quantizerQ   named_modulesr   configr"   rL   r.   float16state_dict_keysanyupdatelist)r!   r   rF   rG   new_keysrQ   r   rN   _valid_modules_skipped_modules_skip_module	_ref_keys_rm_keysr   )rO   rZ   rA   r   update_expected_keysg   sR   

	
z#HqqHfQuantizer.update_expected_keysparam_valueztorch.Tensor
param_name
state_dictc           	      K   sn   t  r	ddlm} t||\}}| jr#t|tjjst||o"|dkS t|tjjr.|dkp6t||o6|dkS )Nr   rP   r?   rY   )	r   r^   rQ   r   rD   r5   r.   rJ   rK   )	r!   r   rm   rn   ro   r#   rQ   rN   tensor_namer   r   r   check_quantized_param   s    z$HqqHfQuantizer.check_quantized_paramtarget_deviceztorch.deviceunexpected_keysc                 C   sr  t  rddlm} td|fdd}||_t||\}	}
d|ddd }t||}|dd }|
d	kr:dS i }|	 D ] \}}|d |v r`|||dd < |dur`||v r`|
| q@| jrt|	|rkdS |dd| j|d
d}|| |jdurt|jtjrtj|j|_| jr| |}t||| |	`~	tj  dS |D ]}t|	|tj||  q|jjd }|jjd }d|	jddd }d}d|v r|}n||v r|| }|D ]}||	jv rd} nq|dur$||	|| j|dd}|jdurt|jtjrtj|j|_| jr| |}t||| n|	j| j|d}	t|||	 tj  dS )a  
        Each nn.Linear layer is processed here.
        We first check if the corresponding module state_dict contains already HQQ quantized parameters.
        If not, we create a temp linear layer with the module state_dict params and use it for quantization
        r   rP   _selfc                 S   s   t jd| j| jdS )Nr   dtyperW   )r.   emptyrV   rW   )rt   r   r   r   r?      s   z5HqqHfQuantizer.create_quantized_param.<locals>.weightr   Nr   rY   FrS   rU   rR   weight_quant_paramsT)rU   rV   rW   rX   ru   )r   r^   rQ   propertyr?   r   joinr   r   itemsremoverD   r5   r   load_state_dictrY   r.   TensorrJ   	Parameterr    _patch_layer_for_multigpusetattr__dict__r/   empty_cacher`   r"   r   to)r!   r   rm   rn   rr   ro   rs   rQ   r?   rN   rp   
layer_nameparent_modulenodemodule_state_dictkv	hqq_layerrA   rU   rR   
module_tagmodule_quant_configskip_moduler   r   r   create_quantized_param   s   








z%HqqHfQuantizer.create_quantized_paramc                    s$   t dd   fdd_S )Nc                 S   s4   t || j|   }| jd ur|| j7 }|S rH   )r.   matmulr   rW   
dequantizetrY   )r!   xoutr   r   r   forward_with_device1  s   

zEHqqHfQuantizer._patch_layer_for_multigpu.<locals>.forward_with_devicec                    s
    | S rH   r   )r   r   r   r   r   <lambda>7  s   
 z:HqqHfQuantizer._patch_layer_for_multigpu.<locals>.<lambda>)r   forward)r!   r   r   r   r   r   .  s   z(HqqHfQuantizer._patch_layer_for_multigpuc                 K   s   t || jd}d S )N)r"   )r   r"   r!   r   r#   r   r   r   $_process_model_before_weight_loading:  s   z3HqqHfQuantizer._process_model_before_weight_loadingc                 K   s   d|_ |  |_|S NT)is_hqq_quantizedis_serializableis_hqq_serializabler   r   r   r   #_process_model_after_weight_loadingC  s   
z2HqqHfQuantizer._process_model_after_weight_loadingNc                 C      dS r   r   )r!   safe_serializationr   r   r   r   H  s   zHqqHfQuantizer.is_serializablec                 C   r   r   r   )r!   r   r   r   is_trainableK  s   zHqqHfQuantizer.is_trainable)r   r   rH   )__name__
__module____qualname____doc__use_keep_in_fp32_modules requires_parameters_quantizationrequires_calibrationrequired_packagesr   r;   re   strrE   rl   r6   r   boolrq   r   r   r   r   r   rz   r   __classcell__r   r   r$   r   r   -   st     
	
=



s

	
r   )typingr   r   integrationsr   utilsr   r   r   r	   baser   quantizers_utilsr   modeling_utilsr   accelerate.hooksr   r.   
get_loggerr   r3   r   r   r   r   r   r   <module>   s   
