o
    iU                      @   s   d dl mZmZ ddlmZ ddlmZ ddlmZ er"ddl	m
Z
 ddlmZmZmZmZmZ dd	lmZ e r=d d
lZeeZG dd deZd
S )    )TYPE_CHECKINGOptional   )tqdm   )HfQuantizer)get_module_from_name)PreTrainedModel)is_accelerate_availableis_flute_availableis_hadamard_availableis_torch_availablelogging)QuantizationConfigMixinNc                       s   e Zd ZdZdZdZddgZdef fddZd	d
 Z	d+ddZ
dddddeddfddZ	d,dddeee  fddZd-ddZdee d edee fd!d"Zedefd#d$Zd,d%d&Zdddedefd'd(Zd)d* Z  ZS ).HiggsHfQuantizerz
    Quantizer of the HIGGS method. Enables the loading of prequantized models and in-flight quantization of full-precision models.
    FTzflute-kernelfast_hadamard_transformquantization_configc                    s   t  j|fi | || _d S N)super__init__r   )selfr   kwargs	__class__ d/home/ubuntu/veenaModal/venv/lib/python3.10/site-packages/transformers/quantizers/quantizer_higgs.pyr   +   s   
zHiggsHfQuantizer.__init__c                 K   s~   t j s	tdt stdt stdt std|d u r&tdt	|t
r;d| v s7d| v r=tdd S d S )	NzNHIGGS quantization is only supported on GPU. Please use a different quantizer.zHUsing `higgs` quantization requires Accelerate: `pip install accelerate`zLUsing `higgs` quantization requires FLUTE: `pip install flute-kernel>=0.3.0`zbUsing `higgs` quantization requires fast_hadamard_transform: `pip install fast_hadamard_transform`zwYou are attempting to load a HIGGS model without setting device_map. Please set device_map comprised of 'cuda' devices.cpudiskzYou are attempting to load a HIGGS model with a device_map that contains a CPU or disk device. This is not supported. Please remove the CPU or disk device from the device_map.)torchcudais_availableNotImplementedErrorr
   ImportErrorr   r   
ValueError
isinstancedictvalues)r   
device_mapr   r   r   r   validate_environment/   s&   
"z%HiggsHfQuantizer.validate_environmentdtypetorch.dtypereturnc                 C   sD   |d u rt d tj}|S |tjkr |tjkr td| d|S )NzG`dtype` is None. Setting `dtype=torch.float16` for FLUTE compatibility.zInvalid `dtype` zS. HIGGS quantization only supports `dtype=torch.float16` or `dtype=torch.bfloat16`.)loggerinfor   float16bfloat16r#   )r   r)   r   r   r   update_dtypeI   s   

zHiggsHfQuantizer.update_dtypemodelr	   param_valueztorch.Tensor
param_nametarget_deviceztorch.devicec                 K   s   ddl m} |||| jj| jj| jj| jj}~t||\}}	d	|
dd d }
| D ]=\}}||jv rFtjj|dd|j|< q1||jv rUtj||j|< q1|dkre||_| | jj|
< q1td| d	| d S )
Nr   )quantize_with_higgs.F)requires_gradtune_metadatazUnexpected key z in module )integrationsr5   tor   bitsp
group_sizehadamard_sizer   joinsplititems_parametersr   nn	Parameter_buffersBufferr9   to_dictr#   )r   r1   r2   r3   r4   r   r5   
flute_dictmodule_module_namekeyvaluer   r   r   create_quantized_paramT   s*   

z'HiggsHfQuantizer.create_quantized_paramNkeep_in_fp32_modulesc                 K   s@   ddl m} | || jj|| _||| j| jd | j|j_d S )Nr   )replace_with_higgs_linear)r   modules_to_not_convert)r:   rQ   get_modules_to_not_convertr   rR   config)r   r1   rP   r   rQ   r   r   r   $_process_model_before_weight_loadingt   s   
z5HiggsHfQuantizer._process_model_before_weight_loadingc           
         s   ddl m}m} ddlm} ddlm  i } fdd| D }t|	 dd	d
D ]@\}}	|	j
j|vr?||	j
jd||	j
j< ||	j
j |	_|| jj| |	_||	j
j|	jj|	jd\|	j
_|	_|	j | jj|< q*d S )Nr   )TuneMetaDatamaybe_tune_and_repack)make_workspace_streamkr   HiggsLinearc                    s    i | ]\}}t | r||qS r   r$   .0namerJ   rY   r   r   
<dictcomp>   s     zHHiggsHfQuantizer._process_model_after_weight_loading.<locals>.<dictcomp>zRepacking HIGGS modulesF)descleave)device)weightscalesmetadata)
flute.tunerV   rW   flute.utilsrX   r:   rZ   named_modulesr   rB   rc   rb   	workspace	from_dictr   r9   datard   rH   )
r   r1   r   rV   rW   rX   flute_workspacesflute_modulesr^   rJ   r   rY   r   #_process_model_after_weight_loading   s"   z4HiggsHfQuantizer._process_model_after_weight_loadingmissing_keysprefixc                    sL   ddl m   fdd| D dtdtffddfd	d
|D S )Nr   rY   c                    s   h | ]\}}t | r|qS r   r[   r\   rY   r   r   	<setcomp>   s    z7HiggsHfQuantizer.update_missing_keys.<locals>.<setcomp>rM   r+   c                    s>    ds
 drdS  d  t fddD S )Nz.weightz.biasFr6   c                 3   s     | ]}|v p| v V  qd S r   r   )r]   r^   full_keyrM   r   r   	<genexpr>   s    zNHiggsHfQuantizer.update_missing_keys.<locals>.should_update.<locals>.<genexpr>)endswithany)rM   )higgs_namesrp   rr   r   should_update   s   z;HiggsHfQuantizer.update_missing_keys.<locals>.should_updatec                    s   g | ]} |s|qS r   r   )r]   rM   )rx   r   r   
<listcomp>   s    z8HiggsHfQuantizer.update_missing_keys.<locals>.<listcomp>)r:   rZ   rh   strbool)r   r1   ro   rp   r   )rZ   rw   rp   rx   r   update_missing_keys   s   z$HiggsHfQuantizer.update_missing_keysc                 C      dS )NFr   )r   r   r   r   is_trainable   s   zHiggsHfQuantizer.is_trainablec                 C   r}   )NTr   )r   safe_serializationr   r   r   is_serializable   s   z HiggsHfQuantizer.is_serializablec                 K   s4   ddl m} t||\}}t||r|dkrdS dS )Nr   rY   rc   TF)r:   rZ   r   r$   )r   r1   r3   r   rZ   rJ   tensor_namer   r   r   param_needs_quantization   s
   z)HiggsHfQuantizer.param_needs_quantizationc                 C   s   ddl m} ||}|S )Nr   )dequantize_higgs)r:   r   )r   r1   r   r   r   r   _dequantize   s   zHiggsHfQuantizer._dequantize)r)   r*   r+   r*   r   )r1   r	   )__name__
__module____qualname____doc__requires_calibration requires_parameters_quantizationrequired_packagesr   r   r(   r0   rz   rO   r   listrU   rn   r|   propertyr{   r~   r   r   r   __classcell__r   r   r   r   r   "   s<    

#




r   )typingr   r   utils.loggingr   baser   quantizers_utilsr   modeling_utilsr	   utilsr
   r   r   r   r   utils.quantization_configr   r   
get_loggerr   r,   r   r   r   r   r   <module>   s   
