o
    wi                     @   s   d dl Z d dlmZmZmZmZ d dlmZ ddlm	Z	 ddl
mZ er*ddlmZ dd	lmZmZmZmZ dd
lmZ e rCd dlZeeZG dd de	ZdS )    N)TYPE_CHECKINGAnyOptionalUnion)version   )HfQuantizer)get_module_from_name   )PreTrainedModel)is_accelerate_availableis_optimum_quanto_availableis_torch_availablelogging)QuantoConfigc                
       s:  e Zd ZdZddgZdZdZdef fddZd	d
 Z	dd Z
dd Zd4ddZdee dedee fddZdddddedeeef def
ddZd eeeeef f deeeeef f fd!d"Zddddded#d$fd%d&Zd5d(d)Z	*d6ddd+eee  fd,d-Zd.d/ Zed6ded fd0d1Zd6d2d3Z  ZS )7QuantoHfQuantizerz*
    Quantizer for the quanto library
    quanto
accelerateTFquantization_configc                    s    t  j|fi | |   d S N)super__init__	post_init)selfr   kwargs	__class__ e/home/ubuntu/sommelier/.venv/lib/python3.10/site-packages/transformers/quantizers/quantizer_quanto.pyr   2   s   zQuantoHfQuantizer.__init__c                 C   s"   | j jdur| jstddS dS )z 
        Safety checker
        NzWe don't support quantizing the activations with transformers library.Use quanto library for more complex use cases such as activations quantization, calibration and quantization aware training.)r   activationspre_quantized
ValueError)r   r   r   r   r   6   s
   zQuantoHfQuantizer.post_initc                 O   s    t  stdt stdd S )NzhLoading an optimum-quanto quantized model requires optimum-quanto library (`pip install optimum-quanto`)z`Loading an optimum-quanto quantized model requires accelerate library (`pip install accelerate`))r   ImportErrorr   )r   argsr   r   r   r   validate_environment@   s   z&QuantoHfQuantizer.validate_environmentc                 C   s   |d u rddi}t d |S )N cpuzThe device_map was not initialized. Setting device_map to {'':'cpu'}. If you want to use the model for inference, please set device_map ='auto')loggerinfo)r   
device_mapr   r   r   update_device_mapJ   s   z#QuantoHfQuantizer.update_device_maptorch_dtypetorch.dtypereturnc                 C   s   |d u rt d tj}|S )NzVYou did not specify `torch_dtype` in `from_pretrained`. Setting it to `torch.float32`.)r'   r(   torchfloat32)r   r+   r   r   r   update_torch_dtypeT   s   
z$QuantoHfQuantizer.update_torch_dtypemissing_keysprefixc                    s   t  r	ddlm} g  | D ]*\}}t||r9|D ]}||v s)|| d| v r8|ds8|ds8 | qq fdd|D S )Nr   QModuleMixin.z.weightz.biasc                    s   g | ]}| vr|qS r   r   ).0knot_missing_keysr   r   
<listcomp>h   s    z9QuantoHfQuantizer.update_missing_keys.<locals>.<listcomp>)r   optimum.quantor4   named_modules
isinstanceendswithappend)r   modelr1   r2   r4   namemodulemissingr   r8   r   update_missing_keysZ   s   

z%QuantoHfQuantizer.update_missing_keysr@   r   param_valueztorch.Tensor
param_name
state_dictc                 K   s   t  r	ddlm} |dd}|dd}|dur:|dur:t| }	|dkr:t|	dkr:|	dhks:|	ddhks:d	S t||\}
}t|
|rNd
|v rN|
j	 S d	S )z=
        Check if a parameter needs to be quantized.
        r   r3   r)   Nparam_devicer&   r   diskFweight)
r   r;   r4   getsetvalueslenr	   r=   frozen)r   r@   rE   rF   rG   r   r4   r)   rH   device_map_valuesrB   tensor_namer   r   r   check_quantized_paramj   s   z'QuantoHfQuantizer.check_quantized_param
max_memoryc                 C   s   dd |  D }|S )Nc                 S   s   i | ]	\}}||d  qS )g?r   )r6   keyvalr   r   r   
<dictcomp>   s    z7QuantoHfQuantizer.adjust_max_memory.<locals>.<dictcomp>)items)r   rS   r   r   r   adjust_max_memory   s   z#QuantoHfQuantizer.adjust_max_memorytarget_deviceztorch.devicec           
      O   s<   ddl m} ||||| t||\}}	|  d|j_dS )ze
        Create the quantized parameter by calling .freeze() after setting it to the module.
        r   )set_module_tensor_to_deviceFN)accelerate.utilsrZ   r	   freezerJ   requires_grad)
r   r@   rE   rF   rY   r#   r   rZ   rB   _r   r   r   create_quantized_param   s
   z(QuantoHfQuantizer.create_quantized_paramtarget_dtypec                 C   sV   t tj dt dkr'ddlm} tj|j|j	|j
d}|| jj }|S td)Nr   z0.27.0r   )CustomDtype)int8float8int4int2zYou are using `device_map='auto'` on an optimum-quanto quantized model. To automatically compute the appropriate device map, you should upgrade your `accelerate` library,`pip install --upgrade accelerate` or install it from source.)r   parse	importlibmetadatar[   ra   r.   rb   FP8INT4INT2r   weightsr!   )r   r`   ra   mappingr   r   r   adjust_target_dtype   s   z%QuantoHfQuantizer.adjust_target_dtypeNkeep_in_fp32_modulesc                 K   sD   ddl m} | || jj|| _||| j| jd\}}| j|j_d S )Nr
   )replace_with_quanto_layers)modules_to_not_convertr   )integrationsrp   get_modules_to_not_convertr   rq   config)r   r@   ro   r   rp   r^   r   r   r   $_process_model_before_weight_loading   s   


z6QuantoHfQuantizer._process_model_before_weight_loadingc                 K   s   |S r   r   )r   r@   r   r   r   r   #_process_model_after_weight_loading      z5QuantoHfQuantizer._process_model_after_weight_loadingc                 C      dS )NTr   )r   r@   r   r   r   is_trainable   s   zQuantoHfQuantizer.is_trainablec                 C   rx   )NFr   )r   safe_serializationr   r   r   is_serializable   rw   z!QuantoHfQuantizer.is_serializable)r+   r,   r-   r,   )r`   r,   r-   r,   r   ) __name__
__module____qualname____doc__required_packages requires_parameters_quantizationrequires_calibrationr   r   r   r$   r*   r0   liststrrD   dictr   boolrR   r   intrX   r_   rn   r   ru   rv   propertyry   r{   __classcell__r   r   r   r   r   )   sV    





2



r   )rg   typingr   r   r   r   	packagingr   baser   quantizers_utilsr	   modeling_utilsr   utilsr   r   r   r   utils.quantization_configr   r.   
get_loggerr|   r'   r   r   r   r   r   <module>   s   
