o
    i                     @   sX   d dl mZmZmZ d dlmZ ddlmZ e rddlZe	e
ZG dd deZdS )	   )is_compressed_tensors_availableis_torch_availablelogging)CompressedTensorsConfig   )HfQuantizer    Nc                       s   e Zd ZdZdZdgZdef fddZdd ZdddZ	dd Z
dd Zdd Zedd ZdefddZddefddZ  ZS )CompressedTensorsHfQuantizerz
    Quantizer for the compressed_tensors package.  Loads and restores models to
    quantized state with compressed_tensors
    Tcompressed_tensorsquantization_configc                    sT   t  j|fi | t std|  ddlm} ||| _|j	| _	|| _
d S )NuUsing `compressed_tensors` quantized models requires the compressed-tensors library: `pip install compressed-tensors`r   )ModelCompressor)super__init__r   ImportError	post_initcompressed_tensors.compressorsr   from_compression_config
compressorrun_compressedr   )selfr   kwargsr   	__class__ h/home/ubuntu/.local/lib/python3.10/site-packages/transformers/quantizers/quantizer_compressed_tensors.pyr   $   s   
z%CompressedTensorsHfQuantizer.__init__c                 O   s    t  stdt stdd S )Nr   z;torch is required for using compressed-tensors quantization)r   r   r   )r   argsr   r   r   r   validate_environment7   s   z1CompressedTensorsHfQuantizer.validate_environmentdtypetorch.dtypereturnc                 C   s4   |d u rt d tj}|S |tjkrt d |S )NzELoading model using torch.float16 for compressed-tensors quantizationzZWe suggest you to set `dtype=torch.float16` for better efficiency with compressed_tensors.)loggerinfotorchfloat16)r   r   r   r   r   update_dtypeA   s   


z)CompressedTensorsHfQuantizer.update_dtypec                 K   sH   ddl m} | jj}|||| j | jjs| jjr"| jj|d d S d S )Nr   )apply_quantization_configmodel)compressed_tensors.quantizationr&   r   r   r   is_quantization_compressedis_sparsification_compressedcompress_model)r   r(   r   r&   ct_quantization_configr   r   r   $_process_model_before_weight_loadingI   s   zACompressedTensorsHfQuantizer._process_model_before_weight_loadingc                 K   s,   | j jr| jr| j jr| jj|d dS dS )z3Decompress loaded model if necessary - need for qatr'   N)r   r*   r   r+   r   decompress_model)r   r(   r   r   r   r   #_process_model_after_weight_loadingV   s   z@CompressedTensorsHfQuantizer._process_model_after_weight_loadingc                 C   s>   dddddd}|  d ur|  jd ur|  j| |S )Nlocal_colwiselocal_rowwise)z0layers.*.feed_forward.experts.*.gate_proj.weightz6layers.*.feed_forward.experts.*.gate_proj.weight_scalez.layers.*.feed_forward.experts.*.up_proj.weightz4layers.*.feed_forward.experts.*.up_proj.weight_scalez0layers.*.feed_forward.experts.*.down_proj.weight)get_text_configbase_model_tp_planupdate)r   configadditional_planr   r   r   update_tp_plan^   s   z+CompressedTensorsHfQuantizer.update_tp_planc                 C      dS )NTr   r   r   r   r   is_trainablek      z)CompressedTensorsHfQuantizer.is_trainablec                 C   s   | j  p| jj S )z7Loaded Models can carry out quantization aware training)r   r   r*   r:   r   r   r   is_qat_trainableo   s   z-CompressedTensorsHfQuantizer.is_qat_trainableNc                 C   r9   )z>Models quantized using compressed tensors can be saved to diskTr   )r   safe_serializationr   r   r   is_serializablet   r<   z,CompressedTensorsHfQuantizer.is_serializable)r   r   r    r   )N)__name__
__module____qualname____doc__requires_calibrationrequired_packagesr   r   r   r%   r.   r0   r8   propertyr;   boolr=   r?   __classcell__r   r   r   r   r	      s    


r	   )utilsr   r   r   utils.quantization_configr   baser   r#   
get_loggerr@   r!   r	   r   r   r   r   <module>   s   
