o
    d۷i$                     @   s   d dl mZmZ d dlmZmZmZmZmZ d dl	Z	d dl
mZ d dlmZmZ d dlmZ d dlmZ d dl	mZ d d	lmZ d
gZG dd
 d
eeZdS )    )ABCabstractmethod)Dict	GeneratorOptionalTupleUnionN)SparsityCompressionConfig)QuantizationArgsQuantizationConfig)RegistryMixin)has_offloaded_params)Tensor)ModuleBaseCompressorc                   @   s6  e Zd ZdZ	d deeedf fddZ	d dej	de
e deeeej	ejf f fd	d
Zeedee fddZedeeef deeef fddZe	d!dededeeeef ddf fddZdede
eeejf  fddZdedeeejf fddZdefddZdeeef dejfddZdS )"r   a  
    Base class representing a model compression algorithm. Each child class should
    implement compression_param_info, compress_weight and decompress_weight.

    Compressors support compressing/decompressing a full module state dict or a single
    quantized PyTorch leaf module.

    Model Load Lifecycle (run_compressed=False):
        - ModelCompressor.decompress()
            - apply_quantization_config()
            - BaseCompressor.decompress()

    Model Save Lifecycle:
        - ModelCompressor.compress()
            - BaseCompressor.compress()


    Module Lifecycle (run_compressed=True):
        - apply_quantization_config()
        - compressed_module = CompressedLinear(module)
            - initialize_module_for_quantization()
            - BaseCompressor.compression_param_info()
            - register_parameters()
        - compressed_module.forward()
            -compressed_module.decompress()


    :param config: config specifying compression parameters
    Nconfigc                 C   s
   || _ d S N)r   )selfr    r   Y/home/ubuntu/vllm_env/lib/python3.10/site-packages/compressed_tensors/compressors/base.py__init__=   s   
zBaseCompressor.__init__weight_shapequantization_argsreturnc                 C      t  )aY  
        Creates a dictionary of expected shapes and dtypes for each compression
            parameter used by the compressor

        :param weight_shape: uncompressed weight shape
        :param quantization_args: quantization parameters for the weight
        :return: dictionary mapping compressed parameter names to shape and dtype
        NotImplementedError)r   r   r   r   r   r   compression_param_infoB      z%BaseCompressor.compression_param_infoc                 C   r   )zx
        Returns a tuple of compression parameter names introduced by
        the compressor during compression
        r   )r   r   r   r   compression_param_namesQ   s   z&BaseCompressor.compression_param_namesmodel_statec                 K   r   )z
        Compresses a dense state dict

        :param model_state: state dict of uncompressed model
        :param kwargs: additional arguments for compression
        :return: compressed state dict
        r   )r   r    kwargsr   r   r   compressZ   r   zBaseCompressor.compresscpupath_to_model_or_tensorsdevicec                 K   r   )a  
        Reads a compressed state dict located at path_to_model_or_tensors
        and returns a generator for sequentially decompressing back to a
        dense state dict

        :param path_to_model_or_tensors: path to compressed safetensors model (directory
            with one or more safetensors files) or compressed tensors file
        :param names_to_scheme: quantization args for each quantized weight
        :param device: optional device to load intermediate weights into
        :return: compressed state dict
        r   )r   r$   r%   r!   r   r   r   
decompressi   s   zBaseCompressor.decompressmodulec                 C   s^   t |dsdS |j}t |dsdS |j}t|dd}t|dd}t|dd}| j||||dS )a"  
        Compresses a single quantized leaf PyTorch module. If the module is not
        quantized, this function has no effect.

        :param module: PyTorch module to compress
        :return: dictionary of compressed weight data, or None if module is not
            quantized
        quantization_schemeNweightsweightweight_scaleweight_zero_point)r*   scale
zero_pointr   )hasattrr(   r)   getattrcompress_weight)r   r'   r(   r   r*   r+   r,   r   r   r   compress_module}   s   
	
zBaseCompressor.compress_moduler*   c                 K   r   )z
        Compresses a single uncompressed weight

        :param weight: uncompressed weight tensor
        :param kwargs: additional arguments for compression
        r   )r   r*   r!   r   r   r   r1      s   zBaseCompressor.compress_weightc           	      C   sx   t | j}t|rdn|}t|dsdS |j}t|ds dS |j}i }| D ]\}}|||< q)| j||d	|S )a  
        Decompresses a single compressed leaf PyTorch module. If the module is not
        quantized, this function has no effect.

        :param module: PyTorch module to decompress
        :return: tensor of the decompressed weight, or None if module is not quantized
        r#   r(   Nr)   )compressed_datar   )
next
parametersr%   r   r/   r(   r)   named_parametersdecompress_weightto)	r   r'   params_devicer%   r(   r   r3   name	parameterr   r   r   decompress_module   s    	


z BaseCompressor.decompress_moduler3   c                 K   r   )z
        Decompresses a single compressed weight

        :param compressed_data: dictionary of data needed for decompression
        :param kwargs: additional arguments for decompression
        :return: tensor of the decompressed weight
        r   )r   r3   r!   r   r   r   r7      s   
z BaseCompressor.decompress_weightr   )r#   )__name__
__module____qualname____doc__r   r	   r   r   torchSizer   r
   r   strr   dtyper   propertyr   r   r   r"   r   r&   r   r2   r1   r<   r7   r   r   r   r   r      sX    



 

)abcr   r   typingr   r   r   r   r   rA   compressed_tensors.configr	   compressed_tensors.quantizationr
   r   compressed_tensors.registryr   compressed_tensors.utilsr   r   torch.nnr   __all__r   r   r   r   r   <module>   s   