o
    oi                     @   s@   d dl mZ G dd deZG dd deZG dd deZdS )	    )Optimizer1Statec                       2   e Zd Z										d fd	d
	Z  ZS )Adagrad{Gz?r   绽|=    N   d   Tc                    s   d|kst d| d|kst d| d|ks!t d| |dkr)t d|dkr1t dt d||d|||||	|
| d	S )
a  
        Base Adagrad optimizer.

        Arguments:
            params (`torch.tensor`):
                The input parameters to optimize.
            lr (`float`, defaults to 1e-2):
                The learning rate.
            lr_decay (`int`, defaults to 0):
                The learning rate decay.
            weight_decay (`float`, defaults to 0.0):
                The weight decay value for the optimizer.
            initial_accumulator_value (`int`, defaults to 0):
                The initial momemtum values.
            eps (`float`, defaults to 1e-10):
                The epsilon value prevents division by zero in the optimizer.
            optim_bits (`int`, defaults to 32):
                The number of bits of the optimizer state.
            args (`object`, defaults to `None`):
                An object with additional arguments.
            min_8bit_size (`int`, defaults to 4096):
                The minimum number of elements of the parameter tensors for 8-bit optimization.
            percentile_clipping (`int`, defaults to 100):
                Adapts clipping threshold automatically by tracking the last 100 gradient norms and clipping the gradient at a certain percentile to improve stability.
            block_wise (`bool`, defaults to `True`):
                Whether to independently quantize each block of tensors to reduce outlier effects and improve stability.
                Invalid learning rate: Invalid weight_decay value: Invalid epsilon value: /Initial accumulator value != 0.0 not supported!Lr Decay != 0.0 not supported!adagradr
   r
   N
ValueErrorsuper__init__selfparamslrlr_decayweight_decayinitial_accumulator_valueeps
optim_bitsargsmin_8bit_sizepercentile_clipping
block_wise	__class__ N/home/ubuntu/.local/lib/python3.10/site-packages/bitsandbytes/optim/adagrad.pyr   	   .   )zAdagrad.__init__
r   r   r   r   r   r   Nr   r	   T__name__
__module____qualname__r   __classcell__r%   r%   r#   r&   r          r   c                       r   )Adagrad8bitr   r   r      Nr   r	   Tc                    s   d|kst d| d|kst d| d|ks!t d| |dkr)t d|dkr1t d|s5J t d||d||d	||	|
| d
S )a  
        8-bit Adagrad optimizer.

        Arguments:
            params (`torch.tensor`):
                The input parameters to optimize.
            lr (`float`, defaults to 1e-2):
                The learning rate.
            lr_decay (`int`, defaults to 0):
                The learning rate decay.
            weight_decay (`float`, defaults to 0.0):
                The weight decay value for the optimizer.
            initial_accumulator_value (`int`, defaults to 0):
                The initial momemtum values.
            eps (`float`, defaults to 1e-10):
                The epsilon value prevents division by zero in the optimizer.
            optim_bits (`int`, defaults to 8):
                The number of bits of the optimizer state.
            args (`object`, defaults to `None`):
                An object with additional arguments.
            min_8bit_size (`int`, defaults to 4096):
                The minimum number of elements of the parameter tensors for 8-bit optimization.
            percentile_clipping (`int`, defaults to 100):
                Adapts clipping threshold automatically by tracking the last 100 gradient norms and clipping the gradient at a certain percentile to improve stability.
            block_wise (`bool`, defaults to `True`):
                Whether to independently quantize each block of tensors to reduce outlier effects and improve stability.
        r
   r   r   r   r   r   r   r   r0   Nr   r   r#   r%   r&   r   L   s0   )zAdagrad8bit.__init__)
r   r   r   r   r   r0   Nr   r	   Tr)   r%   r%   r#   r&   r/   K   r.   r/   c                       r   )Adagrad32bitr   r   r   r   Nr   r	   Tc                    s   d|kst d| d|kst d| d|ks!t d| |dkr)t d|dkr1t dt d||d||d	||	|
| d
S )a  
        32-bit Adagrad optimizer.

        Arguments:
            params (`torch.tensor`):
                The input parameters to optimize.
            lr (`float`, defaults to 1e-2):
                The learning rate.
            lr_decay (`int`, defaults to 0):
                The learning rate decay.
            weight_decay (`float`, defaults to 0.0):
                The weight decay value for the optimizer.
            initial_accumulator_value (`int`, defaults to 0):
                The initial momemtum values.
            eps (`float`, defaults to 1e-10):
                The epsilon value prevents division by zero in the optimizer.
            optim_bits (`int`, defaults to 32):
                The number of bits of the optimizer state.
            args (`object`, defaults to `None`):
                An object with additional arguments.
            min_8bit_size (`int`, defaults to 4096):
                The minimum number of elements of the parameter tensors for 8-bit optimization.
            percentile_clipping (`int`, defaults to 100):
                Adapts clipping threshold automatically by tracking the last 100 gradient norms and clipping the gradient at a certain percentile to improve stability.
            block_wise (`bool`, defaults to `True`):
                Whether to independently quantize each block of tensors to reduce outlier effects and improve stability.
        r
   r   r   r   r   r   r   r   r   Nr   r   r#   r%   r&   r      r'   zAdagrad32bit.__init__r(   r)   r%   r%   r#   r&   r1      r.   r1   N)bitsandbytes.optim.optimizerr   r   r/   r1   r%   r%   r%   r&   <module>   s   CD