o
    ‚o™i=  ã                   @   s@   d dl mZ G dd„ deƒZG dd„ deƒZG dd„ deƒZdS )	é    )ÚOptimizer1Statec                       s0   e Zd Z									d
‡ fdd	„	Z‡  ZS )ÚSGDr   Fé    Né   éd   Tc                    s8   |dkrt dƒ‚tƒ  d||||fd||||	|
|¡ dS )a•  
        Base SGD optimizer.

        Arguments:
            params (`torch.tensor`):
                The input parameters to optimize.
            lr (`float`):
                The learning rate.
            momentum (`float`, defaults to 0):
                The momentum value speeds up the optimizer by taking bigger steps.
            dampening (`float`, defaults to 0):
                The dampening value reduces the momentum of the optimizer.
            weight_decay (`float`, defaults to 0.0):
                The weight decay value for the optimizer.
            nesterov (`bool`, defaults to `False`):
                Whether to use Nesterov momentum.
            optim_bits (`int`, defaults to 32):
                The number of bits of the optimizer state.
            args (`object`, defaults to `None`):
                An object with additional arguments.
            min_8bit_size (`int`, defaults to 4096):
                The minimum number of elements of the parameter tensors for 8-bit optimization.
            percentile_clipping (`int`, defaults to 100):
                Adapts clipping threshold automatically by tracking the last 100 gradient norms and clipping the gradient at a certain percentile to improve stability.
            block_wise (`bool`, defaults to `True`):
                Whether to independently quantize each block of tensors to reduce outlier effects and improve stability.
        r   ú&SGD without momentum is not supported!Úmomentumç        N©ÚNotImplementedErrorÚsuperÚ__init__)ÚselfÚparamsÚlrr   Ú	dampeningÚweight_decayÚnesterovÚ
optim_bitsÚargsÚmin_8bit_sizeÚpercentile_clippingÚ
block_wise©Ú	__class__© úJ/home/ubuntu/.local/lib/python3.10/site-packages/bitsandbytes/optim/sgd.pyr   	   s   )õzSGD.__init__)	r   r   r   Fr   Nr   r   T©Ú__name__Ú
__module__Ú__qualname__r   Ú__classcell__r   r   r   r   r      s    ôr   c                       ó.   e Zd Z								d	‡ fdd„	Z‡  ZS )
ÚSGD8bitr   FNr   r   Tc                    ó8   |dkrt dƒ‚tƒ  d||||fd|d|||	|
¡ dS )a+  
        8-bit SGD optimizer.

        Arguments:
            params (`torch.tensor`):
                The input parameters to optimize.
            lr (`float`):
                The learning rate.
            momentum (`float`, defaults to 0):
                The momentum value speeds up the optimizer by taking bigger steps.
            dampening (`float`, defaults to 0):
                The dampening value reduces the momentum of the optimizer.
            weight_decay (`float`, defaults to 0.0):
                The weight decay value for the optimizer.
            nesterov (`bool`, defaults to `False`):
                Whether to use Nesterov momentum.
            args (`object`, defaults to `None`):
                An object with additional arguments.
            min_8bit_size (`int`, defaults to 4096):
                The minimum number of elements of the parameter tensors for 8-bit optimization.
            percentile_clipping (`int`, defaults to 100):
                Adapts clipping threshold automatically by tracking the last 100 gradient norms and clipping the gradient at a certain percentile to improve stability.
            block_wise (`bool`, defaults to `True`):
                Whether to independently quantize each block of tensors to reduce outlier effects and improve stability.
        r   r   r   r	   é   Nr
   ©r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   D   ó   &õzSGD8bit.__init__©r   r   r   FNr   r   Tr   r   r   r   r   r#   C   ó    õr#   c                       r"   )
ÚSGD32bitr   FNr   r   Tc                    r$   )a,  
        32-bit SGD optimizer.

        Arguments:
            params (`torch.tensor`):
                The input parameters to optimize.
            lr (`float`):
                The learning rate.
            momentum (`float`, defaults to 0):
                The momentum value speeds up the optimizer by taking bigger steps.
            dampening (`float`, defaults to 0):
                The dampening value reduces the momentum of the optimizer.
            weight_decay (`float`, defaults to 0.0):
                The weight decay value for the optimizer.
            nesterov (`bool`, defaults to `False`):
                Whether to use Nesterov momentum.
            args (`object`, defaults to `None`):
                An object with additional arguments.
            min_8bit_size (`int`, defaults to 4096):
                The minimum number of elements of the parameter tensors for 8-bit optimization.
            percentile_clipping (`int`, defaults to 100):
                Adapts clipping threshold automatically by tracking the last 100 gradient norms and clipping the gradient at a certain percentile to improve stability.
            block_wise (`bool`, defaults to `True`):
                Whether to independently quantize each block of tensors to reduce outlier effects and improve stability.
        r   r   r   r	   r   Nr
   r&   r   r   r   r   |   r'   zSGD32bit.__init__r(   r   r   r   r   r   r*   {   r)   r*   N)Úbitsandbytes.optim.optimizerr   r   r#   r*   r   r   r   r   Ú<module>   s   ;8