o
    i%D                     @   s  d dl mZ d dlmZ d dlmZ d dlZd dlmZm	Z	m
Z
mZ d dlmZ d dlmZ d dlmZmZ d d	lmZ g d
Ze 			d.dejdejdejde	deej deej deej dejfddZe 					d/dejdejdeej dee	 deej deej deej dejfddZe 		d0dejdejdejde	deej deej dejfddZe 					d1dejdejdejde	deej deej dededeej dejfddZded efd!d"Zded#ejd$edd%dejf
d&d'Z e 		d0dejdejdejd(ejd)ejde	deej deej dejfd*d+Z!e 			d.dejdejdejdeej deej dejfd,d-Z"dS )2    )wraps)ceil)OptionalN)DynamicTypeQuantizationArgsQuantizationStrategyround_to_quantized_type_args)QuantizationStatus)QuantizationScheme)calculate_rangecompute_dynamic_scales_and_zp)Module)quantize
dequantizefake_quantizewrap_module_forward_quantizedforward_quantizexscale
zero_pointargsdtypeg_idxglobal_scalereturnc                 C   s   t | ||||dd||d	S )a  
    Quantize the input tensor x using the QuantizationStrategy specified in args.
    Quantization can be done per tensor, channel, token or group. For group
    quantization, the group_size must be divisible by the column size. The input scale
    and zero_points are reshaped to support vectorization (Assumes 1 is the
    channel dimension)

    :param x: Input tensor
    :param scale: scale tensor
    :param zero_point: zero point tensor
    :param args: quantization args dictating how to quantize x
    :param dtype: optional dtype to cast the quantized output to
    :param g_idx: optional mapping from column index to group index
    :param global_scale: optional constant to scale the quantization scale during QDQ
    :return: fake quantized tensor
    TF)	r   r   r   r   r   do_quantizedo_dequantizer   r   _process_quantization)r   r   r   r   r   r   r    r   e/home/ubuntu/.local/lib/python3.10/site-packages/compressed_tensors/quantization/lifecycle/forward.pyr   ,   s   r   x_qc                 C   s  |du ry|j dks|j dkrttjd}nd|j dkrp|jd dkr(ttjd}nQ|jd dks9|jd | jd krMt| jd |jd  }ttj|d}n,| jd | jd }}	||jd  }
|	|jd  }ttj|
|gd	}n	t	d
|j  d|du r|j
}t| |||dd|||d	S )a?  
    Dequantize a quantized input tensor x_q based on the strategy specified in args. If
    args is not provided, the strategy will be inferred.

    :param x: quantized input tensor
    :param scale: scale tensor
    :param zero_point: zero point tensor
    :param args: quantization args used to quantize x_q
    :param dtype: optional dtype to cast the dequantized output to
    :param g_idx: optional mapping from column index to group index
    :param global_scale: optional constant to scale the quantization scale during QDQ
    :return: dequantized float tensor
    Nr      )strategy   )r#   
group_size)r#   block_structurez8Could not infer a quantization strategy from scale with z* dimmensions. Expected 0 or 2 dimmensions.FT)	r   r   r   r   r   r   r   r   r   )ndimr   r   TENSORshapeCHANNELintGROUPBLOCK
ValueErrorr   r   )r!   r   r   r   r   r   r   r%   rowscolsblock_heightblock_widthr   r   r    r   T   sD   
"r   c              
   C   s   t | |||dd||dS )a  
    Fake quantize the input tensor x by quantizing then dequantizing with
    the QuantizationStrategy specified in args. Quantization can be done per tensor,
    channel, token or group. For group quantization, the group_size must be divisible
    by the column size. The input scale  and zero_points are reshaped to support
    vectorization (Assumes 1 is the channel dimension)

    :param x: Input tensor
    :param scale: scale tensor
    :param zero_point: zero point tensor
    :param args: quantization args dictating how to quantize x
    :param g_idx: optional mapping from column index to group index
    :param global_scale: optional constant to scale the quantization scale during QDQ
    :return: fake quantized tensor
    T)r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r    r      s   r   Tr   r   c	           !   
   C   sB  t || j\}	}
|j}|jtjkr| j}| jd | jd }}|j\}}|| dkr5td| d| d|| dkrFtd| d| d|| }|| }| 	||||
d	d
}|dd}|d urn|ddnd }|r~t||||	|
|||d}|rt||||d}|
d	d
	|}|S |jtjtjfv r|d ur|n| j}t| |}|jd }|jd
k r|d	}|d ur|d	nd }|jd
k s||kr|| dkrtd| d| |d u pd|v }|rtt|| }tj|f|tjd}ntj|dd\}}|t| }t|}| d|} t| jd | |f}| d|} |rFt| |d|d ur=|dnd |||	|
|d}|re|rN|n| }t||d|d ur`|dnd |d}|jdd}||}|s~t|} |d| }|S |rt| |||	|
|||d}|rt|r|n| |||d}|S )Nr&   r'   r   zTensor height z" is not divisible by block_height z-. Block quantization requires exact division.zTensor width z! is not divisible by block_width r"   r$   )r   r   r   q_minq_maxr   r   r   )r!   r   r   r   z=tensor column shape must be divisble by the given group_size z	 but got )r   T)return_counts)r   r   r   r   r   r6   r7   r   )	start_dim)r   r   r   )r   devicer%   r#   r   r/   r+   r(   r0   reshape	transpose	unsqueeze	_quantize_dequantizer.   TENSOR_GROUPr   torch
zeros_liketor)   r-   r   fulluniqueargsortindex_select	unflattenflatten)!r   r   r   r   r   r   r   r   r   r6   r7   r%   original_shaper1   r2   r3   r4   num_rows_blocksnum_cols_blocksx_blockssbzboutputoutput_dtypecolumnsis_column_order
num_groupsgroup_sizesgroup_indicespermreshaped_dimsinputinv_permr   r   r    r      s   

]







r   moduleschemec                    sT   t jdrjj njj t  fdd}|j}td| d S )N__func__c                    s   t dds j|i |S |d }jtjk}jd ur)t|dj}jd urA|sA| j	j
 }t| j	dj| j	_
 j|g|dd  R i |}jd ur`|s`|| j	_
jd uryjtjkrqjjsq|S t|dj}|S )Nquantization_enabledTr   rY   weightr"   rP   )getattr__get__	__class__quantization_statusr	   
COMPRESSEDinput_activationsr   weightsr_   datacloneoutput_activationsCALIBRATIONdynamic)selfr   kwargsinput_
compressedunquantized_weightrP   forward_func_origr[   r\   r   r    wrapped_forwardl  s:   



z6wrap_module_forward_quantized.<locals>.wrapped_forwardforward)hasattrrt   r]   funcr   ra   rb   setattr)r[   r\   rs   bound_wrapped_forwardr   rq   r    r   d  s   
,r   value	base_namer   c                 C   s   | j tjkr|dkr|S | dkr|S t| dd }t| | dd }|jdtjfv r6t||| |d\}}nt| | d}t| | dd }t	||||||d	S )
Nr_   r   weight_g_idx_global_scaleT)ry   r   r[   r   _scale_zero_pointr5   )
rc   r	   rd   numelr`   rk   r   LOCALr   r   )r[   ry   rz   r   r   r   r   r   r   r   r    r     s*   r   r6   r7   c           
      C   sV   |d ur|| }| | }|d ur|| | j7 }t||||d}	|d ur)|	 |}	|	S )N)tensorr   minmax)rC   r   r   )
r   r   r   r6   r7   r   r   r   scaledquantized_valuer   r   r    r>     s   
r>   c                 C   sR   |d ur|| }|  |j}|d ur|| |j }|| }|d ur'| |}|S )N)rC   r   )r!   r   r   r   r   dequant_valuer   r   r    r?     s   
r?   )NNN)NNNNN)NN)NNTTN)#	functoolsr   mathr   typingr   rA   *compressed_tensors.quantization.quant_argsr   r   r   r   ,compressed_tensors.quantization.quant_configr	   ,compressed_tensors.quantization.quant_schemer
   %compressed_tensors.quantization.utilsr   r   torch.nnr   __all__no_gradTensorr   r   r   r   boolr   r   strr   r>   r?   r   r   r   r    <module>   s.  	'G#	
 $:
(	 