o
    ei                     @   s   d dl mZ ddlmZ ddlmZ erddlmZ ddlm	Z	m
Z
mZmZmZmZmZmZmZ e r=d dlZdd	lmZ eeZG d
d deZdS )    )TYPE_CHECKING   )HfQuantizer)get_module_from_name   )PreTrainedModel)	ACCELERATE_MIN_VERSIONBITSANDBYTES_MIN_VERSIONis_accelerate_availableis_bitsandbytes_availableis_torch_availableis_torch_hpu_availableis_torch_npu_availableis_torch_xpu_availableloggingN)WeightConverterc                       s   e Zd ZdZdZ fddZdd Zddd	ed
ddef fddZ	ddd	ede
fddZdeeeeB f deeeeB f fddZdd Z		d%ddZd%ddZdd Zede
fddZd&dd Zd!d" Zd#d$ Z  ZS )'Bnb4BitHfQuantizerzB
    4-bit quantization from bitsandbytes quantization method
    Fc                    s   t  j|fi | d S N)super__init__)selfquantization_configkwargs	__class__ h/home/ubuntu/transcripts/venv/lib/python3.10/site-packages/transformers/quantizers/quantizer_bnb_4bit.pyr   3   s   zBnb4BitHfQuantizer.__init__c                 O   s   t  stdt dt stdt dddlm} |dd |d	}| jj	sFt
|trHt| }|d
hkrJd
|v sBd|v rLtdd S d S d S d S )NzWUsing `bitsandbytes` 4-bit quantization requires accelerate: `pip install 'accelerate>=z'`z]Using `bitsandbytes` 4-bit quantization requires bitsandbytes: `pip install -U bitsandbytes>=`r   )!validate_bnb_backend_availabilityT)raise_exception
device_mapcpudiska  Some modules are dispatched on the CPU or the disk. Make sure you have enough GPU RAM to fit the quantized model. If you want to dispatch the model on the CPU or the disk while keeping these modules in 32-bit, you need to set `llm_int8_enable_fp32_cpu_offload=True` and pass a custom `device_map` to `from_pretrained`. Check https://huggingface.co/docs/transformers/main/en/main_classes/quantization#offload-between-cpu-and-gpu for more details. )r
   ImportErrorr   r   r	   integrationsr   getr    llm_int8_enable_fp32_cpu_offload
isinstancedictsetvalues
ValueError)r   argsr   r   r    r*   r   r   r   validate_environment6   s&   



z'Bnb4BitHfQuantizer.validate_environmentmodelr   
param_nameparamztorch.Tensorreturnc                    s    |  ||rdS t |||S )z4Return the element size (in bytes) for `param_name`.g      ?)param_needs_quantizationr   param_element_size)r   r.   r/   r0   r   r   r   r3   Q   s   z%Bnb4BitHfQuantizer.param_element_sizec                 K   s,   dd l }t||\}}t||jjo|dkS )Nr   bias)bitsandbytesr   r'   nn
Linear4bit)r   r.   r/   r   bnbmodulenamer   r   r   r2   Y   s   z+Bnb4BitHfQuantizer.param_needs_quantization
max_memoryc                 C   s   dd |  D }|S )Nc                 S   s   i | ]	\}}||d  qS )g?r   ).0keyvalr   r   r   
<dictcomp>a   s    z8Bnb4BitHfQuantizer.adjust_max_memory.<locals>.<dictcomp>)items)r   r;   r   r   r   adjust_max_memory_   s   z$Bnb4BitHfQuantizer.adjust_max_memoryc                 C   s   |d u rEt j rdt j i}n+t rddt j  i}nt r-ddt j  i}nt r8dt j	 i}nddi}t
d| d |S )N znpu:zhpu:r!   z:The device_map was not initialized. Setting device_map to zL. If you want to use the model for inference, please set device_map ='auto' )torchcudais_availablecurrent_devicer   npur   hpur   xpuloggerinfo)r   r    r   r   r   update_device_mapd   s    
z$Bnb4BitHfQuantizer.update_device_mapc                 K   sl   ddl m} | || jj|j| _| jjr)t|t	r)dd |
 D }| j| ||| j| j| jd}d S )Nr   )replace_with_bnb_linearc                 S   s   g | ]
\}}|d v r|qS ))r"   r!   r   )r<   r=   valuer   r   r   
<listcomp>   s    zKBnb4BitHfQuantizer._process_model_before_weight_loading.<locals>.<listcomp>)modules_to_not_convertr   pre_quantized)r$   rM   get_modules_to_not_convertr   llm_int8_skip_modules_keep_in_fp32_modulesrP   r&   r'   r(   r@   extendrQ   )r   r.   r    r   rM   keys_on_cpur   r   r   $_process_model_before_weight_loadingw   s   

z7Bnb4BitHfQuantizer._process_model_before_weight_loadingc                 K   s   d|_ |  |_|S NT)is_loaded_in_4bitis_serializableis_4bit_serializable)r   r.   r   r   r   r   #_process_model_after_weight_loading   s   
z6Bnb4BitHfQuantizer._process_model_after_weight_loadingc                 C      dS rX   r   r   r   r   r   rZ      s   z"Bnb4BitHfQuantizer.is_serializablec                 C   r]   rX   r   r^   r   r   r   is_trainable   s   zBnb4BitHfQuantizer.is_trainableNc                 C   s    ddl m} ||| j|d}|S )Nr   )dequantize_and_replace)r   dtype)r$   r`   r   )r   r.   ra   r`   r   r   r   _dequantize   s   zBnb4BitHfQuantizer._dequantizec                 C   s   ddl m} || S )Nr   )Bnb4bitQuantize)integrations.bitsandbytesrc   )r   rc   r   r   r   get_quantize_ops   s   z#Bnb4BitHfQuantizer.get_quantize_opsc                 C   s0   ddl m} | jrtg dd|| gdgS g S )Nr   )Bnb4bitDeserialize)zweight.nested_absmaxzweight.nested_quant_mapzweight.quant_mapzweight.absmaxz$weight.quant_state.bitsandbytes__nf4z$weight.quant_state.bitsandbytes__fp4weightrg   )source_patternstarget_patterns
operations)rd   rf   rQ   r   )r   rf   r   r   r   get_weight_conversions   s   	z)Bnb4BitHfQuantizer.get_weight_conversions)r.   r   r   )__name__
__module____qualname____doc__requires_calibrationr   r-   strfloatr3   boolr2   r(   intrA   rL   rW   r\   rZ   propertyr_   rb   re   rk   __classcell__r   r   r   r   r   ,   s&    *


r   )typingr   baser   quantizers_utilsr   modeling_utilsr   utilsr   r	   r
   r   r   r   r   r   r   rC   core_model_loadingr   
get_loggerrl   rJ   r   r   r   r   r   <module>   s   ,
