o
    i/                     @   s  U d dl Z d dlmZmZ d dlmZ d dlmZmZm	Z	 d dl
Z
d dlmZ ddlmZmZmZ ddlmZmZmZmZ dd	lmZ e eZG d
d dZdd Zedefi Zeed< G dd dee
j j!Z"G dd de"Z#G dd de"Z$G dd de"Z%dS )    N)ABCMetaabstractmethod)partial)AnyOptionalTuple)_fake_quantize_affine   )GranularityPerRow	PerTensor)MappingTypeZeroPointDomain_get_reduction_params"choose_qparams_affine_with_min_max)get_block_sizec                   @   s,   e Zd Zdd Zdd Zdd Zdd Zd	S )
_PartialWrapperc                 C   s
   || _ d S Np)selfr    r   Q/home/ubuntu/.local/lib/python3.10/site-packages/torchao/quantization/observer.py__init__      
z_PartialWrapper.__init__c                 O   s   | j |i |S r   r   )r   argskeywordsr   r   r   __call__    s   z_PartialWrapper.__call__c                 C   s
   | j  S r   )r   __repr__r   r   r   r   r   #   r   z_PartialWrapper.__repr__c                 O   s   t | g|R i |S r   )
_with_args)r   r   kwargsr   r   r   	with_args&   s   z_PartialWrapper.with_argsN)__name__
__module____qualname__r   r   r   r"   r   r   r   r   r      s
    r   c                 O   s   t t| g|R i |}|S )a  Wrapper that allows creation of class factories.

    This can be useful when there is a need to create classes with the same
    constructor arguments, but different instances.

    Example::

        >>> # xdoctest: +SKIP("Undefined vars")
        >>> Foo.with_args = classmethod(_with_args)
        >>> foo_builder = Foo.with_args(a=3, b=4).with_args(answer=42)
        >>> foo_instance1 = foo_builder()
        >>> foo_instance2 = foo_builder()
        >>> id(foo_instance1) == id(foo_instance2)
        False
    )r   r   )cls_or_selfr   r!   rr   r   r   r    *   s   r    ABCc                       s   e Zd ZdZeeZddddddejdfde	de
jdedee d	ee d
ee dee
j dee
j dededef fddZede
jde
jfddZedee
je
jf fddZ  ZS )AffineQuantizedObserverBasea  Observer module for affine quantization (https://github.com/pytorch/ao/tree/main/torchao/quantization#affine-quantization)

    Args:
      `granularity` and `block_size`: The granularity of the quantization,
        must specify at least one, if both are specified `block_size` takes precedence
        Current supported granularity type are `PerTensor` and `PerAxis`
      other args: please see `:class:torchao.dtypes.AffineQuantizedTensor`
    NTFmapping_typetarget_dtypegranularity	quant_min	quant_maxepsscale_dtypezero_point_dtypepreserve_zerozero_point_domainkeepdimc                    sp   t    |d usJ d|
d u rtd|| _|| _|| _|| _|| _|| _|| _	|| _
|	| _|
| _|| _d S )Ngranularity is Nonez/Please use ZeroPointDomain.NONE instead of None)superr   
ValueErrorr*   r+   r,   r-   r.   r/   r0   r1   r2   r3   r4   )r   r*   r+   r,   r-   r.   r/   r0   r1   r2   r3   r4   	__class__r   r   r   M   s   

z$AffineQuantizedObserverBase.__init__inputreturnc                 C      dS )z~forward function should take the input tensor
        and updates internal stats and return the original input Tensor
        Nr   r   r:   r   r   r   forwardk      z#AffineQuantizedObserverBase.forwardc                 C   r<   )zCalculate quantization parameter based on the stats attached to the observer module
        and returns a tuple of scale and zero_point Tensor
        Nr   r   r   r   r   calculate_qparamsr   r?   z-AffineQuantizedObserverBase.calculate_qparams)r#   r$   r%   __doc__classmethodr    r"   r   INTr   torchdtyper
   r   intfloatboolr   r   Tensorr>   r   r@   __classcell__r   r   r8   r   r)   A   sL    		
$r)   c                   @   s6   e Zd ZdejfddZdeejejf fddZdS )AffineQuantizedMinMaxObserverr:   c                 C   s$  |  dkr|S | }| jd usJ dt|j| j}t|| \}}||}tj	||| j
d}tj||| j
d}t| drFt| dsN|| _|| _|S | jj|jksbJ d| jj d|j | jj|jksvJ d| jj d	|j t| j|}t| j|}| j| | j| |S )
Nr   r5   dimr4   min_valmax_valz=Can't update existing min_val - shape mismatch, self.min_val:z != min_val:z=Can't update existing max_val - shape mismatch, self.max_val z != max_val:)numeldetachr,   r   shaper   sizeviewrD   aminr4   amaxhasattrrN   rO   minmaxcopy_)r   r:   input_detached
block_sizeshape_for_reductionreduction_dimsrN   rO   r   r   r   r>   {   s4   
z%AffineQuantizedMinMaxObserver.forwardr;   c                 C   P   t | dr
t | dsJ dt| j| j| jg | j| j| j| j| j	| j
| j| jS NrN   rO   zhExpecting the observer has min_val and max_val, please run the observer before calling calculate_qparamsrW   r   rN   rO   r*   r+   r-   r.   r/   r0   r1   r2   r3   r   r   r   r   r@      "   z/AffineQuantizedMinMaxObserver.calculate_qparamsN)r#   r$   r%   rD   rI   r>   r   r@   r   r   r   r   rK   z   s    rK   c                       s   e Zd ZdZddddddejddf	dedejde	de
e de
e d	e
e d
e
ej de
ej dedede
ej de
ej f fddZdddZdd Zdd Z  ZS )"AffineQuantizedFixedQParamObserverzO
    Observer that allows manual setting of fixed quantization parameters.
    NTr*   r+   r,   r-   r.   r/   r0   r1   r2   r3   scale
zero_pointc                    sh   t  |||||||||	|

 |stdg}|st|}| d|j|d | d|j|d d S )Nr	   rd   rE   re   )r6   r   rD   rI   
zeros_likeregister_bufferto)r   r*   r+   r,   r-   r.   r/   r0   r1   r2   r3   rd   re   r8   r   r   r      s$   
z+AffineQuantizedFixedQParamObserver.__init__c                 C   s2   |st |}|j| jd| _|j| jd| _d S )Nrf   )rD   rg   ri   r0   rd   r1   re   )r   rd   re   r   r   r   set_qparams   s   
z.AffineQuantizedFixedQParamObserver.set_qparamsc                 C   s   |S r   r   r=   r   r   r   r>      s   z*AffineQuantizedFixedQParamObserver.forwardc                 C   s   | j | jfS r   )rd   re   r   r   r   r   r@      s   z4AffineQuantizedFixedQParamObserver.calculate_qparamsr   )r#   r$   r%   rA   r   rC   r   rD   rE   r
   r   rF   rG   rH   rI   r   rj   r>   r@   rJ   r   r   r8   r   rc      sN    		

"rc   c                       s   e Zd ZdZddddddejddf	dedejde	d	e
e d
e
e de
e de
ej de
ej dedededef fddZdd Zdd Zdd Zdd Zdd Z  ZS )AffineQuantizedMSEObserverz
    Minimize quantization loss caused by outlier via linear search. More details can be found at https://arxiv.org/pdf/2209.13325
    NTd   Fr*   r+   r,   r-   r.   r/   r0   r1   r2   r3   stepsrun_oncec                    s4   t  |||||||||	|

 || _d| _|| _d S )NF)r6   r   rm   
calibratedrn   )r   r*   r+   r,   r-   r.   r/   r0   r1   r2   r3   rm   rn   r8   r   r   r      s   
z#AffineQuantizedMSEObserver.__init__c                 C   s>   ||   d}t|| \}}||}tj||ddS )N   FrL   )abspowr   rS   rT   rD   mean)r   predexpectr\   lossr]   r^   r   r   r   mse  s   
zAffineQuantizedMSEObserver.msec                 C   sn   t |j| j}t||| jg | j| j| j| j| j	| j
| j| j\}}t||||| j| j| j| j}| |||S r   )r   rR   r,   r   r*   r+   r-   r.   r/   r0   r1   r2   r3   r   rw   )r   xnew_minnew_maxr\   rd   re   x_qr   r   r   loss_fn  s4   
z"AffineQuantizedMSEObserver.loss_fnc                 C   s   |  dkr|S | }| jd usJ dt|j| j}t|| \}}||}tj	||dd}tj
||dd}t| |}t|d }	td| jd D ]*}
|| j |
 }| || |}t||	k | |}t||	k ||}t||	}	qQ||fS )Nr   r5   FrL   g    eAr	   )rP   rQ   r,   r   rR   r   rS   rT   rD   rU   rV   rY   rq   rg   rangerm   r|   whererX   )r   r:   r[   r\   r]   r^   rN   rO   	range_valoptimal_lossithrescurrent_lossr   r   r   line_search*  s(   
z&AffineQuantizedMSEObserver.line_searchc                 C   s(   | j r| js| |\| _| _d| _|S )NT)rn   ro   r   rN   rO   r=   r   r   r   r>   F  s   z"AffineQuantizedMSEObserver.forwardc                 C   r_   r`   ra   r   r   r   r   r@   M  rb   z,AffineQuantizedMSEObserver.calculate_qparams)r#   r$   r%   rA   r   rC   r   rD   rE   r
   r   rF   rG   rH   r   rw   r|   r   r>   r@   rJ   r   r   r8   r   rk      sR    		
rk   )&loggingabcr   r   	functoolsr   typingr   r   r   rD   %torchao.quantization.quant_primitivesr   r,   r
   r   r   quant_primitivesr   r   r   r   utilsr   	getLoggerr#   loggerr   r    objectr(   __annotations__nnModuler)   rK   rc   rk   r   r   r   r   <module>   s"   

944