o
    ei5                     @   s   d dl mZmZ ddlmZ ddlmZ erddlmZ ddl	m
Z
mZmZmZmZ ddlmZ e r7d d	lZeeZG d
d deZd	S )    )TYPE_CHECKINGOptional   )HfQuantizer)get_module_from_name   )PreTrainedModel)is_fp_quant_availableis_qutlass_availableis_torch_availableis_torch_xpu_availablelogging)QuantizationConfigMixinNc                       s   e Zd ZdZdZdZdef fddZdd ZdddZ	ddde
defddZ		dddZed ded fddZdd Zdd Zdd Z  ZS )!FPQuantHfQuantizerz
    Quantizer for the FP-Quant method. Enables the loading of prequantized models and in-flight quantization of full-precision models.
    FTquantization_configc                    s   t  j|fi | d S N)super__init__)selfr   kwargs	__class__ h/home/ubuntu/transcripts/venv/lib/python3.10/site-packages/transformers/quantizers/quantizer_fp_quant.pyr   )   s   zFPQuantHfQuantizer.__init__c                 K   s   t j st stdt s| jjstd| jjr t	
d t s'td|d u r3| jjs3tdt|trR| jjsHt|dkrHd| v sNd| v rTtd	d S d S )
Nz]FPQuant quantization is only supported on GPU or Intel XPU. Please use a different quantizer.a  Using `fp_quant` with real quantization requires a **Blackwell GPU** and qutlass: `git clone https://github.com/IST-DASLab/qutlass.git && cd qutlass && pip install --no-build-isolation .`. You can use `FPQuantConfig(pseudoquantization=True, ...)` to use Triton-based pseudo-quantization. It doesn't provide any speedups but emulates the quantization behavior of the real quantization.zUsing pseudo-quantization for FP-Quant. This doesn't provide any speedups but emulates the quantization behavior of the real quantization.zGUsing `fp_quant` quantization requires fp_quant: `pip install fp_quant`zyYou are attempting to load a FPQuant model without setting device_map. Please set device_map comprised of 'cuda' devices.r   cpudiskzYou are attempting to load a FPQuant model with a device_map that contains a CPU or disk device. This is not supported. Please remove the CPU or disk device from the device_map.)torchcudais_availabler   NotImplementedErrorr
   r   pseudoquantizationImportErrorloggerwarningr	   
ValueError
isinstancedictlenvalues)r   
device_mapr   r   r   r   validate_environment,   s:   
z'FPQuantHfQuantizer.validate_environmentdtypetorch.dtypereturnc                 C   s&   |t jkrtd| d t j}|S )NzSetting dtype to zP, but only bfloat16 is supported right now. Overwriting torch_dtype to bfloat16.)r   bfloat16r"   warning_once)r   r+   r   r   r   update_dtypeP   s   

zFPQuantHfQuantizer.update_dtypemodelr   
param_namec                 K   s4   ddl m} t||\}}t||r|dv rdS dS )Nr   )FPQuantLinear)weightqweightdqweightTF)fp_quantr3   r   r%   )r   r1   r2   r   r3   moduletensor_namer   r   r   param_needs_quantizationX   s
   z+FPQuantHfQuantizer.param_needs_quantizationc                 K   s.   ddl m} ddlm} |||| jd d S )Nr   )replace_with_fp_quant_linearr   )adapt_fp_quant_config)fp_quant_linear_config)r7   r;   integrations.fp_quantr<   r   )r   r1   r   r;   r<   r   r   r   $_process_model_before_weight_loadingb   s   
z7FPQuantHfQuantizer._process_model_before_weight_loadingNc                 C   s   | j j}|std |S )NzYou are attempting to train a model with FPQuant quantization. This is only supported when `store_master_weights=True`. Please set `store_master_weights=True` to train the model.)r   store_master_weightsr"   r#   )r   r1   	trainabler   r   r   is_trainablep   s   zFPQuantHfQuantizer.is_trainablec                 C   s   dS )NTr   )r   r   r   r   is_serializabley   s   z"FPQuantHfQuantizer.is_serializablec                 C   s   ddl m} || S )Nr   )FpQuantQuantize)r>   rD   )r   rD   r   r   r   get_quantize_ops|   s   z#FPQuantHfQuantizer.get_quantize_opsc                 C   sZ   ddl m} ddlm} | jr+| jjr|dgd|| gdgS |dgd|| gdgS g S )Nr   )WeightConverter)FpQuantDeserializez	.dqweight)source_patternstarget_patterns
operationsz.qweight)core_model_loadingrF   r>   rG   pre_quantizedr   r    )r   rF   rG   r   r   r   get_weight_conversions   s"   	z)FPQuantHfQuantizer.get_weight_conversions)r+   r,   r-   r,   )r1   r   r   )__name__
__module____qualname____doc__requires_calibrationis_qat_trainabler   r   r*   r0   strboolr:   r?   propertyr   rB   rC   rE   rM   __classcell__r   r   r   r   r   !   s     
$

r   )typingr   r   baser   quantizers_utilsr   modeling_utilsr   utilsr	   r
   r   r   r   utils.quantization_configr   r   
get_loggerrN   r"   r   r   r   r   r   <module>   s   
