o
    *i                     @   s   d dl Z d dlmZmZ d dlZd dlmZ d dlmZm	Z	m
Z
 d dlmZ d dlmZ d dlmZ d dlmZ d d	lmZ d d
lmZ G dd deZdS )    N)DictTuple)BaseCompressor)QuantizationSchemeQuantizationStatus"initialize_module_for_quantization)register_offload_parameter)get_execution_device)Tensor)	Parameter)linear)Linearc                       sV   e Zd ZdZd fddZee dede	de
fd	d
ZdedefddZ  ZS )CompressedLinearz
    Wrapper module for running a compressed forward pass of a quantized Linear module.
    The wrapped layer will decompressed on each forward call.

    returnNc                    s"   t  j|i | tdt d S )NzXCompressedLinear should not be initialized directly. Use the from_linear method instead.)super__init__warningswarnUserWarning)selfargskwargs	__class__ h/home/ubuntu/veenaModal/venv/lib/python3.10/site-packages/compressed_tensors/linear/compressed_linear.pyr   (   s
   zCompressedLinear.__init__modulequantization_schemequantization_formatc           
      C   s   t |_t||_t|}t||dd |j|jj	|j
}t|d | D ]\}\}}ttj|||ddd}	t|||	 q'tj|_t|drQt j|t |_|S )a  
        :param module: dense linear module to replace
        :param quantization_scheme: quantization config for the module to wrap
        :param quantization_format: compression format module is stored as
        :return: CompressedLinear module wrapping the input module
        F)force_zero_pointweight)devicedtyperequires_grad_old_forward)r   r   r   load_from_registry
compressorr	   r   compression_param_infor    shapeweightsdelattritemsr   torchemptyr   r   
COMPRESSEDquantization_statushasattrforward__get__r%   )
clsr   r   r   init_devicecompression_paramsnamer)   r"   paramr   r   r   from_linear0   s*   


zCompressedLinear.from_linearinputc                 C   sH   | j tjkr| j| }t|dd}t| d| tj| _ t|| j	| j
S )zM
        Decompresses the weight, then runs the wrapped forward pass
        Fr#   r    )r0   r   r/   r'   decompress_moduler   r   FROZENr   r    bias)r   r:   weight_datar8   r   r   r   r2   b   s   zCompressedLinear.forward)r   N)__name__
__module____qualname____doc__r   classmethodr-   no_gradr   r   strr9   r
   r2   __classcell__r   r   r   r   r   !   s    0r   )r   typingr   r   r-   #compressed_tensors.compressors.baser   compressed_tensors.quantizationr   r   r   compressed_tensors.utilsr    compressed_tensors.utils.offloadr	   r
   torch.nnr   torch.nn.functionalr   torch.nn.modulesr   r   r   r   r   r   <module>   s   