o
    i|                     @   s   d dl mZmZ ddlmZ ddlmZ erddlmZ ddl	m
Z
mZmZmZ ddlmZ e r5d d	lZeeZG d
d deZd	S )    )TYPE_CHECKINGOptional   )HfQuantizer)get_module_from_name   )PreTrainedModel)is_fp_quant_availableis_qutlass_availableis_torch_availablelogging)QuantizationConfigMixinNc                       s   e Zd ZdZdZdZdZdgZdef fddZ	dd	 Z
d'ddZdddddeddfddZ		d(ddZd(ddZdee dedee fddZed)ded fd!d"Zd)d#d$Zdddedefd%d&Z  ZS )*FPQuantHfQuantizerz
    Quantizer for the FP-Quant method. Enables the loading of prequantized models and in-flight quantization of full-precision models.
    FTfp_quantquantization_configc                    s   t  j|fi | || _d S N)super__init__r   )selfr   kwargs	__class__ g/home/ubuntu/veenaModal/venv/lib/python3.10/site-packages/transformers/quantizers/quantizer_fp_quant.pyr   +   s   
zFPQuantHfQuantizer.__init__c                 K   s   t j s	tdt s| jjstd| jjrt	d t
 s$td|d u r0| jjs0tdt|trId| v sAd| v rM| jjsKtdd S d S d S )	NzPFPQuant quantization is only supported on GPU. Please use a different quantizer.a  Using `fp_quant` with real quantization requires a **Blackwell GPU** and qutlass: `git clone https://github.com/IST-DASLab/qutlass.git && cd qutlass && pip install --no-build-isolation .`. You can use `FPQuantConfig(pseudoquantization=True, ...)` to use Triton-based pseudo-quantization. It doesn't provide any speedups but emulates the quantization behavior of the real quantization.zUsing pseudo-quantization for FP-Quant. This doesn't provide any speedups but emulates the quantization behavior of the real quantization.zGUsing `fp_quant` quantization requires fp_quant: `pip install fp_quant`zyYou are attempting to load a FPQuant model without setting device_map. Please set device_map comprised of 'cuda' devices.cpudiskzYou are attempting to load a FPQuant model with a device_map that contains a CPU or disk device. This is not supported. Please remove the CPU or disk device from the device_map.)torchcudais_availableNotImplementedErrorr
   r   pseudoquantizationImportErrorloggerwarningr	   
ValueError
isinstancedictvalues)r   
device_mapr   r   r   r   validate_environment/   s8   
z'FPQuantHfQuantizer.validate_environmentdtypetorch.dtypereturnc                 C   s:   |d u rt d tj}|S |tjkrtd| d|S )NzJ`dtype` is None. Setting `dtype=torch.bfloat16` for qutlass compatibility.zInvalid `dtype` z=. fp_quant quantization only supports `dtype=torch.bfloat16`.)r"   infor   bfloat16r$   )r   r*   r   r   r   update_dtypeQ   s   

zFPQuantHfQuantizer.update_dtypemodelr   param_valueztorch.Tensor
param_nametarget_deviceztorch.devicec                 K   s   t ||\}}|dr tjj||dd|_d |_d |_d S |dr:tj|||_d |_d |_d |_	d S tj|||_|
  d S )Nz.qweightF)requires_gradz	.dqweight)r   endswithr   nn	Parametertoqweightweightdqweightscalespre_forward)r   r0   r1   r2   r3   r   module_r   r   r   create_quantized_paramZ   s"   

z)FPQuantHfQuantizer.create_quantized_paramc                 K   s8   ddl m} ddlm} |||| jd | j|j_d S )Nr   )replace_with_fp_quant_linearr   )adapt_fp_quant_config)fp_quant_linear_config)r   rA   integrations.fp_quantrB   r   config)r   r0   r   rA   rB   r   r   r   $_process_model_before_weight_loading   s   z7FPQuantHfQuantizer._process_model_before_weight_loadingc                 K   s   |S r   r   )r   r0   r   r   r   r   #_process_model_after_weight_loading      z6FPQuantHfQuantizer._process_model_after_weight_loadingmissing_keysprefixc                    sL   ddl m   fdd| D dtdtffddfd	d
|D S )Nr   FPQuantLinearc                    s   h | ]\}}t | r|qS r   )r%   ).0namer>   rK   r   r   	<setcomp>   s    z9FPQuantHfQuantizer.update_missing_keys.<locals>.<setcomp>keyr,   c                    s>    ds
 drdS  d  t fddD S )Nz.weightz.biasF.c                 3   s     | ]}|v p| v V  qd S r   r   )rM   rN   full_keyrP   r   r   	<genexpr>   s    zQFPQuantHfQuantizer.update_missing_keys.<locals>.should_exclude.<locals>.<genexpr>)r5   any)rP   )fp_quant_namesrJ   rR   r   should_exclude   s   z>FPQuantHfQuantizer.update_missing_keys.<locals>.should_excludec                    s   g | ]} |s|qS r   r   )rM   rP   )rW   r   r   
<listcomp>   s    z:FPQuantHfQuantizer.update_missing_keys.<locals>.<listcomp>)r   rL   named_modulesstrbool)r   r0   rI   rJ   r   )rL   rV   rJ   rW   r   update_missing_keys   s   z&FPQuantHfQuantizer.update_missing_keysNc                 C   s   | j j}|std |S )NzYou are attempting to train a model with FPQuant quantization. This is only supported when `store_master_weights=True`. Please set `store_master_weights=True` to train the model.)r   store_master_weightsr"   r#   )r   r0   	trainabler   r   r   is_trainable   s   zFPQuantHfQuantizer.is_trainablec                 C   s   dS )NTr   )r   safe_serializationr   r   r   is_serializable   rH   z"FPQuantHfQuantizer.is_serializablec                 K   s4   ddl m} t||\}}t||r|dv rdS dS )Nr   rK   )r:   r9   r;   TF)r   rL   r   r%   )r   r0   r2   r   rL   r>   tensor_namer   r   r   param_needs_quantization   s
   z+FPQuantHfQuantizer.param_needs_quantization)r*   r+   r,   r+   )r0   r   r   )__name__
__module____qualname____doc__requires_calibration requires_parameters_quantizationis_qat_trainablerequired_packagesr   r   r)   r/   rZ   r@   rF   rG   listr\   propertyr   r_   ra   r[   rc   __classcell__r   r   r   r   r   !   s6    
"	
&


r   )typingr   r   baser   quantizers_utilsr   modeling_utilsr   utilsr	   r
   r   r   utils.quantization_configr   r   
get_loggerrd   r"   r   r   r   r   r   <module>   s   
