o
    d۷i2	                     @   sV   d dl Z d dlZd dlmZ d dlmZ d dlmZ dgZe 	e
ZdefddZdS )    N)quantize)QuantizationStatus)Modulecompress_quantized_weightsmodulec                 C   s   t | dd}|r|jsdS t | dd}|tju rdS t | dd}t | dd}t | dd}t | dd}|du s:|du r@tj| _dS d| j_t|||||jtj	d	| j_
tj| _dS )
z
    Quantizes the module weight representation to use fewer bits in memory

    apply to full model with `model.apply(compress_quantized_weights)`

    :param module: module to compress to quantized representation
    quantization_schemeNquantization_statusweightweight_scaleweight_zero_pointweight_g_idxF)xscale
zero_pointg_idxargsdtype)getattrweightsr   
COMPRESSEDr   r	   requires_gradr   torchint8data)r   schemestatusr	   r   r   r    r   j/home/ubuntu/vllm_env/lib/python3.10/site-packages/compressed_tensors/quantization/lifecycle/compressed.pyr       s.   


	)loggingr   1compressed_tensors.quantization.lifecycle.forwardr   ,compressed_tensors.quantization.quant_configr   torch.nnr   __all__	getLogger__name___LOGGERr   r   r   r   r   <module>   s   
