o
    TÃiŠ  ã                   @   s‚   d dl Z d dl mZ d dlZG dd„ dejƒZG dd„ de jjƒZG dd„ de jjƒZG d	d
„ d
e jjƒZG dd„ de jjƒZdS )é    N)Úautogradc                   @   s8   e Zd ZdZedejdedefdd„ƒZ	edd„ ƒZ
d	S )
ÚTopKBinarizera   
    Top-k Binarizer.
    Computes a binary mask M from a real value matrix S such that `M_{i,j} = 1` if and only if `S_{i,j}`
    is among the k% highest values of S.
    Implementation is inspired from:
        https://github.com/yaozhewei/MLPruning
    ÚinputsÚ	thresholdÚsigmoidc           	      C   s|   |r	t  |¡ ¡ }|| _| ¡ }| ¡ jdd\}}t || ¡  ¡}| ¡ }d|||d… < d||d|… < |  	|¡ |S )ao  
        Args:
            inputs (`torch.FloatTensor`)
                The input matrix from which the binarizer computes the binary mask.
            threshold (`float`)
                The percentage of weights to keep (the rest is pruned).
                `threshold` is a float between 0 and 1.
            sigmoid (`bool`)
                Whether to apply a sigmoid on the threshold
        Returns:
            mask (`torch.FloatTensor`)
                Binary matrix of the same size as `inputs` acting as a mask (1 - the associated weight is
                retained, 0 - the associated weight is pruned).
        T)Ú
descendingg        Ng      ð?)
Útorchr   ÚitemÚcloneÚflattenÚsortÚmathÚceilÚnumelÚsave_for_backward)	Úctxr   r   r   ÚmaskÚ_ÚidxÚjÚflat_out© r   úO/home/ubuntu/.local/lib/python3.10/site-packages/deepspeed/compression/utils.pyÚforward   s   
zTopKBinarizer.forwardc                 C   s8   | j \}| jr| ¡ ||  ¡  d¡d fS | ¡ d d fS )Néÿÿÿÿ)Úsaved_tensorsr   r
   ÚsumÚview)r   Ú
gradOutputr   r   r   r   Úbackward5   s   zTopKBinarizer.backwardN)Ú__name__Ú
__module__Ú__qualname__Ú__doc__Ústaticmethodr   ÚtensorÚfloatÚboolr   r   r   r   r   r   r      s     r   c                   @   ó*   e Zd ZdZeddd„ƒZedd„ ƒZdS )	ÚSymQuantizerz 
    Symmetric quantization
    Né   c                 C   sÆ   |du r|du s|dur|dur|dksJ ‚d| }|j }|du r6| |d¡}tjt |¡dd |d¡}nt | ¡ |¡ d¡}d| | }	||	  ¡  | d |d d ¡|	 }
|
 |¡ 	¡ }
|
S )áü  
        Args:
            inputs (`torch.FloatTensor`)
                The input which needs to be quantized
            num_bits (int, >=4)
                Number of bits to use for quantization
            min_value/max_value (torch.FloatTensor)
                Used for static activation quantization
            num_groups (int)
                How many groups to partition the quantization into
        Returns:
            quantized_input (`torch.FloatTensor`)
                Quantized input
        Nr*   é   r   ©Údim)
ÚshapeÚreshaper   ÚamaxÚabsr   ÚmaxÚroundÚclampÚ
contiguous)r   ÚinputÚnum_bitsÚ	min_valueÚ	max_valueÚ
num_groupsÚq_rangeÚinput_shapeÚ	max_inputÚscaleÚoutputr   r   r   r   C   s    &zSymQuantizer.forwardc                 C   ó   |  ¡ }|d d d d fS ©N©r
   ©r   Úgrad_outputÚ
grad_inputr   r   r   r   b   ó   zSymQuantizer.backward©NNr*   ©r    r!   r"   r#   r$   r   r   r   r   r   r   r)   >   s    r)   c                   @   r(   )	ÚAsymQuantizerz!
    Asymmetric quantization
    Nr*   c                 C   s¼   |du r|du s|dur|dur|dksJ ‚d| }|j }|du r5| |d¡}|jddd}|jddd}|| | }||  ¡ | }	||	 |  ¡  d|d ¡| |	 }
|
 |¡ ¡ }
|
S )r+   Nr*   r,   r   T)r.   Úkeepdimr   )r/   r0   Úaminr1   r4   r5   r6   )r   r7   r8   r9   r:   r;   r<   r=   r?   Ú
zero_pointr@   r   r   r   r   m   s    $zAsymQuantizer.forwardc                 C   rA   rB   rC   rD   r   r   r   r   Ž   rG   zAsymQuantizer.backwardrH   rI   r   r   r   r   rJ   h   s     rJ   c                   @   r(   )	ÚTernaryQuantizerz
    Ternary quantization
    Nr*   c                 C   sÖ   |du r|du s
J ‚|  |d¡}|jd }|jddd |¡}d|  dd¡}	||	k | ¡ ¡}
||	 k  | ¡ ¡}| ¡ |	k | ¡ ¡}||  ¡ jdd|jdd  dd¡}||
 ||  }|  |j¡ ¡ }|S )á   
        Args:
            inputs (`torch.FloatTensor`)
                The input which needs to be quantized
            num_bits (int)
                Dummy variable
            min_value/max_value (torch.FloatTensor)
                Used for static activation quantization; for now they are dummy variable
            num_groups (int)
                How many groups to partition the quantization into
        Returns:
            quantized_input (`torch.FloatTensor`)
                Quantized input
        Nr   r*   )Úpr.   gffffffæ?r-   )	r0   r/   ÚnormÚdivr   Útyper2   r   r6   )r   r7   r8   r9   r:   r;   Ú
input_flatÚnÚmÚthresÚposÚnegr   Úalphar@   r   r   r   r   ™   s   
(zTernaryQuantizer.forwardc                 C   rA   rB   rC   rD   r   r   r   r   ·   rG   zTernaryQuantizer.backwardrH   rI   r   r   r   r   rN   ”   s    rN   c                   @   r(   )	ÚBinaryQuantizerz
    Binary quantization
    Nr*   c           
      C   sb   |du r|du s
J ‚|  |d¡}|jd }|jdddd |¡}| ¡  |¡}	|	  |j¡ ¡ }	|	S )rO   Nr   r*   T)rP   r.   rK   )r0   r/   rQ   rR   ÚsignÚmulr6   )
r   r7   r8   r9   r:   r;   rT   rU   rV   r@   r   r   r   r   Â   s   
zBinaryQuantizer.forwardc                 C   rA   rB   rC   rD   r   r   r   r   Û   rG   zBinaryQuantizer.backwardrH   rI   r   r   r   r   r[   ½   s    r[   )	r   r   r   ÚFunctionr   r)   rJ   rN   r[   r   r   r   r   Ú<module>   s   3*,)