o
    	Û·iü  ã                   @   st   d dl mZ ddlmZ erddlmZ ddlmZmZm	Z	 ddl
mZ eƒ r+d dlZe	 e¡ZG d	d
„ d
eƒZdS )é    )ÚTYPE_CHECKINGé   )ÚHfQuantizeré   )ÚPreTrainedModel)Úis_auto_round_availableÚis_torch_availableÚlogging)ÚQuantizationConfigMixinNc                       sr   e Zd ZdZdZdgZdef‡ fdd„Zdd„ Zddd„Z	ddd„Z
ddd„Zedefdd„ƒZddd„Z‡  ZS )ÚAutoRoundQuantizerzW
    Quantizer of the AutoRound method. (https://huggingface.co/papers/2309.05516)
    TÚ
auto_roundÚquantization_configc                    s   t ƒ j|fi |¤Ž d S ©N)ÚsuperÚ__init__)Úselfr   Úkwargs©Ú	__class__© úb/home/ubuntu/vllm_env/lib/python3.10/site-packages/transformers/quantizers/quantizer_auto_round.pyr   )   s   zAutoRoundQuantizer.__init__c                 O   s   |  d¡| _tƒ stdƒ‚d S )NÚ
device_mapzbLoading an AutoRound quantized model requires auto-round library (`pip install 'auto-round>=0.5'`))Úgetr   r   ÚImportError)r   Úargsr   r   r   r   Úvalidate_environment,   s   ÿÿz'AutoRoundQuantizer.validate_environmentÚdtypeútorch.dtypeÚreturnc                 C   s   |d u rt j}t d¡ |S )NzMLoading the model in `torch.bfloat16`. To overwrite it, set `dtype` manually.)ÚtorchÚbfloat16ÚloggerÚinfo)r   r   r   r   r   Úupdate_dtype3   s   
zAutoRoundQuantizer.update_dtypeÚmodelr   c                 K   sR   |j jdkrt d¡ ddlm}m} | jr'|| jƒ}|||ƒ\}}|| _	d S d S )NÚ	input_idszRAutoRound offers only limited support for models that are not strictly text-based.r   )Úconvert_hf_modelÚinfer_target_device)
r   Úmain_input_namer!   ÚwarningÚ"auto_round.inference.convert_modelr&   r'   Úpre_quantizedr   Úused_backends)r   r$   r   r&   r'   Útarget_devicer,   r   r   r   Ú$_process_model_before_weight_loading9   s   


ýz7AutoRoundQuantizer._process_model_before_weight_loadingc                 K   s*   | j rddlm} ||| jƒ d S tdƒ‚)Nr   )Ú	post_initz+AutoRound only sports pre-quantized models.)r+   r*   r/   r,   Ú
ValueError)r   r$   r   r/   r   r   r   Ú#_process_model_after_weight_loadingC   s   z6AutoRoundQuantizer._process_model_after_weight_loadingc                 C   ó   dS )NFr   )r   r   r   r   Úis_trainableK   ó   zAutoRoundQuantizer.is_trainableNc                 C   r2   )NTr   )r   Úsafe_serializationr   r   r   Úis_serializableO   r4   z"AutoRoundQuantizer.is_serializable)r   r   r   r   )r$   r   r   )Ú__name__Ú
__module__Ú__qualname__Ú__doc__Úrequires_calibrationÚrequired_packagesr
   r   r   r#   r.   r1   ÚpropertyÚboolr3   r6   Ú__classcell__r   r   r   r   r       s    



r   )Útypingr   Úbaser   Úmodeling_utilsr   Úutilsr   r   r	   Úutils.quantization_configr
   r   Ú
get_loggerr7   r!   r   r   r   r   r   Ú<module>   s   
