o
    3wÖij  ã                   @   s@   d dl mZ G dd„ deƒZG dd„ deƒZG dd„ deƒZdS )	é    )ÚOptimizer1Statec                       s4   e Zd Z												
d‡ fdd„	Z‡  ZS )ÚRMSpropç{®Gáz„?ç®Gáz®ï?ç:Œ0âŽyE>r   Fé    Né   éd   Tc                    sD   |dkrt dƒ‚|rt dƒ‚tƒ  d||||f||||	|
||¡ dS )a‘  
        Base RMSprop optimizer.

        Arguments:
            params (`torch.tensor`):
                The input parameters to optimize.
            lr (`float`, defaults to 1e-2):
                The learning rate.
            alpha (`float`, defaults to 0.99):
                The alpha value is the decay rate of the squared gradients of the optimizer.
            eps (`float`, defaults to 1e-8):
                The epsilon value prevents division by zero in the optimizer.
            weight_decay (`float`, defaults to 0.0):
                The weight decay value for the optimizer.
            momentum (`float`, defaults to 0):
                The momentum value speeds up the optimizer by taking bigger steps.
            centered (`bool`, defaults to `False`):
                Whether the gradients are normalized by the variance. If `True`, it can help training at the expense of additional compute.
            optim_bits (`int`, defaults to 32):
                The number of bits of the optimizer state.
            args (`object`, defaults to `None`):
                An object with additional arguments.
            min_8bit_size (`int`, defaults to 4096):
                The minimum number of elements of the parameter tensors for 8-bit optimization.
            percentile_clipping (`int`, defaults to 100):
                Adapts clipping threshold automatically by tracking the last 100 gradient norms and clipping the gradient at a certain percentile to improve stability.
            block_wise (`bool`, defaults to `True`):
                Whether to independently quantize each block of tensors to reduce outlier effects and improve stability.
        r   ú)RMSprop with alpha==0.0 is not supported!ú"Centered RMSprop is not supported!ÚrmspropN©ÚNotImplementedErrorÚsuperÚ__init__)ÚselfÚparamsÚlrÚalphaÚepsÚweight_decayÚmomentumÚcenteredÚ
optim_bitsÚargsÚmin_8bit_sizeÚpercentile_clippingÚ
block_wise©Ú	__class__© úW/home/ubuntu/sommelier/.venv/lib/python3.10/site-packages/bitsandbytes/optim/rmsprop.pyr   	   ó"   ,õzRMSprop.__init__)r   r   r   r   r   Fr   Nr   r	   T©Ú__name__Ú
__module__Ú__qualname__r   Ú__classcell__r    r    r   r!   r      s    ór   c                       ó2   e Zd Z											d‡ fd
d„	Z‡  ZS )ÚRMSprop8bitr   r   r   r   FNr   r	   Tc                    óD   |dkrt dƒ‚|rt dƒ‚tƒ  d||||f||d||	|
|¡ dS )a’  
        8-bit RMSprop optimizer.

        Arguments:
            params (`torch.tensor`):
                The input parameters to optimize.
            lr (`float`, defaults to 1e-2):
                The learning rate.
            alpha (`float`, defaults to 0.99):
                The alpha value is the decay rate of the squared gradients of the optimizer.
            eps (`float`, defaults to 1e-8):
                The epsilon value prevents division by zero in the optimizer.
            weight_decay (`float`, defaults to 0.0):
                The weight decay value for the optimizer.
            momentum (`float`, defaults to 0):
                The momentum value speeds up the optimizer by taking bigger steps.
            centered (`bool`, defaults to `False`):
                Whether the gradients are normalized by the variance. If `True`, it can help training at the expense of additional compute.
            optim_bits (`int`, defaults to 32):
                The number of bits of the optimizer state.
            args (`object`, defaults to `None`):
                An object with additional arguments.
            min_8bit_size (`int`, defaults to 4096):
                The minimum number of elements of the parameter tensors for 8-bit optimization.
            percentile_clipping (`int`, defaults to 100):
                Adapts clipping threshold automatically by tracking the last 100 gradient norms and clipping the gradient at a certain percentile to improve stability.
            block_wise (`bool`, defaults to `True`):
                Whether to independently quantize each block of tensors to reduce outlier effects and improve stability.
        r   r
   r   r   é   Nr   ©r   r   r   r   r   r   r   r   r   r   r   r   r   r    r!   r   I   s"   +õzRMSprop8bit.__init__©
r   r   r   r   r   FNr   r	   Tr#   r    r    r   r!   r)   H   ó    ôr)   c                       r(   )ÚRMSprop32bitr   r   r   r   FNr   r	   Tc                    r*   )a“  
        32-bit RMSprop optimizer.

        Arguments:
            params (`torch.tensor`):
                The input parameters to optimize.
            lr (`float`, defaults to 1e-2):
                The learning rate.
            alpha (`float`, defaults to 0.99):
                The alpha value is the decay rate of the squared gradients of the optimizer.
            eps (`float`, defaults to 1e-8):
                The epsilon value prevents division by zero in the optimizer.
            weight_decay (`float`, defaults to 0.0):
                The weight decay value for the optimizer.
            momentum (`float`, defaults to 0):
                The momentum value speeds up the optimizer by taking bigger steps.
            centered (`bool`, defaults to `False`):
                Whether the gradients are normalized by the variance. If `True`, it can help training at the expense of additional compute.
            optim_bits (`int`, defaults to 32):
                The number of bits of the optimizer state.
            args (`object`, defaults to `None`):
                An object with additional arguments.
            min_8bit_size (`int`, defaults to 4096):
                The minimum number of elements of the parameter tensors for 8-bit optimization.
            percentile_clipping (`int`, defaults to 100):
                Adapts clipping threshold automatically by tracking the last 100 gradient norms and clipping the gradient at a certain percentile to improve stability.
            block_wise (`bool`, defaults to `True`):
                Whether to independently quantize each block of tensors to reduce outlier effects and improve stability.
        r   r
   r   r   r   Nr   r,   r   r    r!   r   ˆ   r"   zRMSprop32bit.__init__r-   r#   r    r    r   r!   r/   ‡   r.   r/   N)Úbitsandbytes.optim.optimizerr   r   r)   r/   r    r    r    r!   Ú<module>   s   @?