o
    i,                     @   s   d dl mZ d dlmZ ddlmZ ddlmZmZm	Z	 ddl
mZ ddlmZ er0dd	lmZ e r7d d
lZe rId dlmZ edd Zee_e	eZG dd deZd
S )    )defaultdict)TYPE_CHECKING   )prepare_for_hqq_linear)is_hqq_availableis_torch_availablelogging   )HfQuantizer)get_module_from_name)PreTrainedModelN)	HQQLinearc                 C   s   t jd| j| jdS )Nr   )dtypedevice)torchemptycompute_dtyper   self r   ^/home/ubuntu/LTX-2/.venv/lib/python3.10/site-packages/transformers/quantizers/quantizer_hqq.pyweight%   s   r   c                	       s   e Zd ZdZdZdZdZdgZ fddZddd	e	e
 d
e
de	e
 fddZddde	e
 de	e
 de	e
 fddZddde
defddZddddde
ddfddZdd Z		d&ddZd&dd Zd'd"d#Zedefd$d%Z  ZS )(HqqHfQuantizerz
    HQQ quantizer base HF class.
    nn.Linear modules are first tagged with quant_config in _process_model_before_weight_loading().
    FThqqc                    s   t  stdt j|fi | d | _d| _td d  dh | _|	dds.|	ddr2t
d| jd u rJd|v rA|d | _n	tj| _td |	d	}t|trqd
| v s`d| v rdt
dtt| dk| _d S d S )NzA valid HQQ version (>=0.2.1) is not available. Please follow the instructions to install it: `https://github.com/mobiusml/hqq/`.Fbiasfrom_tf	from_flaxzwConverting weights from tf/flax weights is currently not supported, please make sure the weights are in PyTorch format.r   zOSetting dtype to torch.float32 as the default value since it was not specified.
device_mapcpudiskzYou are attempting to use an HQQ model with a device_map that contains a CPU or disk device. This is not supported. Please remove the CPU or disk device from the device_map.r	   )r   ImportErrorsuper__init__r   using_multi_gpur   state_dict_keyshqq_keysget
ValueErrorr   float32loggerinfo
isinstancedictvalueslenset)r   quantization_configkwargsr   	__class__r   r   r"   9   s2   



zHqqHfQuantizer.__init__modelr   missing_keysprefixreturnc                 K   s   | j r
dd |D S |S )Nc                 S   s   g | ]}d |vr|qS )r   r   ).0keyr   r   r   
<listcomp>_       z6HqqHfQuantizer.update_missing_keys.<locals>.<listcomp>)pre_quantized)r   r4   r5   r6   r1   r   r   r   update_missing_keys[   s   z"HqqHfQuantizer.update_missing_keysexpected_keysloaded_keysc                    s8  | j s|S  fdd t|}| D ]\}}||_qt } || t }|D ]|jjd D ]}	|	v r;| q0q(||8 }td d tj	ddd
 dh }
t }|D ]tfdd	|D rg| qU||8 }|D ])d
 |v r~|d
  n|fdd|
D  d |v r|d  qnt|S )Nc                    s:   |   D ]\}}t|tjjr||j  || qd S N)named_childrenr+   r   nnLinearaddname)r4   layersrE   module)_find_hqq_quantizable_layersr   r   rH   k   s
   zIHqqHfQuantizer.update_expected_keys.<locals>._find_hqq_quantizable_layersskip_modulesr   Flinear_layerquant_configr   r   del_origr   c                 3       | ]}| v V  qd S r@   r   )r8   _module)r9   r   r   	<genexpr>       z6HqqHfQuantizer.update_expected_keys.<locals>.<genexpr>z.weightc                    s   h | ]} d  | qS ).r   )r8   _ref_key)rO   r   r   	<setcomp>   r;   z6HqqHfQuantizer.update_expected_keys.<locals>.<setcomp>z.bias)r<   r/   named_modulesrE   configr0   rD   r   r   float16r$   anyupdatelist)r   r4   r>   r?   new_keysrE   rG   _valid_modules_skipped_modules_skip_module	_ref_keys_rm_keysr   )rH   rO   r9   r   update_expected_keysd   sN   

	
z#HqqHfQuantizer.update_expected_keys
param_namec                 K   s   t ||\}}t|tjjS r@   )r   r+   r   rB   rC   )r   r4   rb   r1   rG   _r   r   r   param_needs_quantization   s   z'HqqHfQuantizer.param_needs_quantizationparam_valueztorch.Tensortarget_deviceztorch.devicec                    s.  t ||\}|ddd }t ||\}}	|jjd }
|jjd }tfdd|D r?j||j|| jdid	d
d d S | jrt	| dsLt
t| _| j| ||i | j|  t fdd| jD rd v spjd u rtd d | j|d	d}|  |jd urt|jtjrtj|j|_| jr| |}t||	| | j|= d S j||id	d
d jjjdkoĈjd u pĈjjjdk}|rdjddd  }d|
v r|
}n||
v r|
| }t|| j|d
d}|jd urt|jtjrtj|j|_| jr| |}t||	| d S d S )NrR   r	   r   rL   rI   c                 3   s    | ]}| j v V  qd S r@   )rE   )r8   skip_module)rG   r   r   rP      s    z8HqqHfQuantizer.create_quantized_param.<locals>.<genexpr>)r   r   FT)strictassign
hqq_paramsc                 3   rN   r@   r   )r8   k)rj   r   r   rP      rQ   r   rJ   metaweight_quant_params)rL   r   r   rM   ) r   rsplitrV   r0   rX   load_state_dicttor   r<   hasattrr   r,   rj   rY   allr%   r   r   r+   r   TensorrB   	Parameterr#   _patch_layer_for_multigpusetattrr   r   typejoinrE   split)r   r4   re   rb   rf   r1   tensor_namemodule_nameparent_modulenoderL   rI   	hqq_layermodule_is_ready
module_tagmodule_quant_configr   )rj   rG   r   create_quantized_param   sl   


*



z%HqqHfQuantizer.create_quantized_paramc                    s   dd   fdd_ S )Nc                 S   s4   t || j|   }| jd ur|| j7 }|S r@   )r   matmulrq   r   
dequantizetr   )r   xoutr   r   r   forward_with_device   s   

zEHqqHfQuantizer._patch_layer_for_multigpu.<locals>.forward_with_devicec                    s
    | S r@   r   )r   r   r   r   r   <lambda>   s   
 z:HqqHfQuantizer._patch_layer_for_multigpu.<locals>.<lambda>)forward)r   r   r   r   r   rv      s   z(HqqHfQuantizer._patch_layer_for_multigpuc                 K   s   t || jd}d S )N)r0   )r   r0   r   r4   r1   r   r   r   $_process_model_before_weight_loading  s   z3HqqHfQuantizer._process_model_before_weight_loadingc                 K   s   d|_ |  |_|S NT)is_hqq_quantizedis_serializableis_hqq_serializabler   r   r   r   #_process_model_after_weight_loading
  s   
z2HqqHfQuantizer._process_model_after_weight_loadingNc                 C      dS r   r   )r   safe_serializationr   r   r   r     s   zHqqHfQuantizer.is_serializablec                 C   r   r   r   r   r   r   r   is_trainable  s   zHqqHfQuantizer.is_trainable)r4   r   r@   )__name__
__module____qualname____doc__use_keep_in_fp32_modules requires_parameters_quantizationrequires_calibrationrequired_packagesr"   rZ   strr=   ra   boolrd   r   rv   r   r   r   propertyr   __classcell__r   r   r2   r   r   .   sV    "
	
;
R


	
r   )collectionsr   typingr   integrationsr   utilsr   r   r   baser
   quantizers_utilsr   modeling_utilsr   r   hqq.core.quantizer   r   r   
get_loggerr   r)   r   r   r   r   r   <module>   s"   

