o
    3wi]-                     @   sp   d dl mZ G dd deZG dd deZG dd deZG dd	 d	eZG d
d deZG dd deZdS )    )Optimizer1Statec                       s0   e Zd Z										d fd
d	Z  ZS )Lion-C6?g?gGz?r       N   d   TFc                    s(   t  jd|||d||||||	|
d dS )aj  
        Base Lion optimizer.

        Arguments:
            params (`torch.tensor`):
                The input parameters to optimize.
            lr (`float`, defaults to 1e-4):
                The learning rate.
            betas (`tuple(float, float)`, defaults to (0.9, 0.999)):
                The beta values are the decay rates of the first and second-order moment of the optimizer.
            weight_decay (`float`, defaults to 0):
                The weight decay value for the optimizer.
            optim_bits (`int`, defaults to 32):
                The number of bits of the optimizer state.
            args (`object`, defaults to `None`):
                An object with additional arguments.
            min_8bit_size (`int`, defaults to 4096):
                The minimum number of elements of the parameter tensors for 8-bit optimization.
            percentile_clipping (`int`, defaults to 100):
                Adapts clipping threshold automatically by tracking the last 100 gradient norms and clipping the gradient at a certain percentile to improve stability.
            block_wise (`bool`, defaults to `True`):
                Whether to independently quantize each block of tensors to reduce outlier effects and improve stability.
            is_paged (`bool`, defaults to `False`):
                Whether the optimizer is a paged optimizer or not.
        lion        is_pagedNsuper__init__)selfparamslrbetasweight_decay
optim_bitsargsmin_8bit_sizepercentile_clipping
block_wiser   	__class__ T/home/ubuntu/sommelier/.venv/lib/python3.10/site-packages/bitsandbytes/optim/lion.pyr   	   s   &
zLion.__init__)	r   r   r   r   Nr   r   TF__name__
__module____qualname__r   __classcell__r   r   r   r   r      s    r   c                       .   e Zd Z								d fd	d
	Z  ZS )Lion8bitr   r   r   Nr   r   TFc
           
         (   t  jd|||d|d|||||	d dS )a   
        8-bit Lion optimizer.

        Arguments:
            params (`torch.tensor`):
                The input parameters to optimize.
            lr (`float`, defaults to 1e-4):
                The learning rate.
            betas (`tuple(float, float)`, defaults to (0.9, 0.999)):
                The beta values are the decay rates of the first and second-order moment of the optimizer.
            weight_decay (`float`, defaults to 0):
                The weight decay value for the optimizer.
            args (`object`, defaults to `None`):
                An object with additional arguments.
            min_8bit_size (`int`, defaults to 4096):
                The minimum number of elements of the parameter tensors for 8-bit optimization.
            percentile_clipping (`int`, defaults to 100):
                Adapts clipping threshold automatically by tracking the last 100 gradient norms and clipping the gradient at a certain percentile to improve stability.
            block_wise (`bool`, defaults to `True`):
                Whether to independently quantize each block of tensors to reduce outlier effects and improve stability.
            is_paged (`bool`, defaults to `False`):
                Whether the optimizer is a paged optimizer or not.
        r	   r
      r   Nr   
r   r   r   r   r   r   r   r   r   r   r   r   r   r   @      #
zLion8bit.__init__r   r   r   Nr   r   TFr   r   r   r   r   r$   ?       r$   c                       r#   )	Lion32bitr   r   r   Nr   r   TFc
           
         r%   )a  
        32-bit Lion optimizer.

        Arguments:
            params (`torch.tensor`):
                The input parameters to optimize.
            lr (`float`, defaults to 1e-4):
                The learning rate.
            betas (`tuple(float, float)`, defaults to (0.9, 0.999)):
                The beta values are the decay rates of the first and second-order moment of the optimizer.
            weight_decay (`float`, defaults to 0):
                The weight decay value for the optimizer.
            args (`object`, defaults to `None`):
                An object with additional arguments.
            min_8bit_size (`int`, defaults to 4096):
                The minimum number of elements of the parameter tensors for 8-bit optimization.
            percentile_clipping (`int`, defaults to 100):
                Adapts clipping threshold automatically by tracking the last 100 gradient norms and clipping the gradient at a certain percentile to improve stability.
            block_wise (`bool`, defaults to `True`):
                Whether to independently quantize each block of tensors to reduce outlier effects and improve stability.
            is_paged (`bool`, defaults to `False`):
                Whether the optimizer is a paged optimizer or not.
        r	   r
   r   r   Nr   r'   r   r   r   r   t   r(   zLion32bit.__init__r)   r   r   r   r   r   r+   s   r*   r+   c                       r#   )	PagedLionr   r   r   r   Nr   r   Tc
           
         s(   t  jd|||d||||||	dd dS )a  
        Paged Lion optimizer.

        Arguments:
            params (`torch.tensor`):
                The input parameters to optimize.
            lr (`float`, defaults to 1e-4):
                The learning rate.
            betas (`tuple(float, float)`, defaults to (0.9, 0.999)):
                The beta values are the decay rates of the first and second-order moment of the optimizer.
            weight_decay (`float`, defaults to 0):
                The weight decay value for the optimizer.
            optim_bits (`int`, defaults to 32):
                The number of bits of the optimizer state.
            args (`object`, defaults to `None`):
                An object with additional arguments.
            min_8bit_size (`int`, defaults to 4096):
                The minimum number of elements of the parameter tensors for 8-bit optimization.
            percentile_clipping (`int`, defaults to 100):
                Adapts clipping threshold automatically by tracking the last 100 gradient norms and clipping the gradient at a certain percentile to improve stability.
            block_wise (`bool`, defaults to `True`):
                Whether to independently quantize each block of tensors to reduce outlier effects and improve stability.
        r	   r
   Tr   Nr   )
r   r   r   r   r   r   r   r   r   r   r   r   r   r      r(   zPagedLion.__init__)r   r   r   r   Nr   r   Tr   r   r   r   r   r,      r*   r,   c                       ,   e Zd Z							d
 fdd		Z  ZS )PagedLion8bitr   r   r   Nr   r   Tc	           	         (   t  jd|||d|d||||dd dS )a  
        Paged 8-bit Lion optimizer.

        Arguments:
            params (`torch.tensor`):
                The input parameters to optimize.
            lr (`float`, defaults to 1e-4):
                The learning rate.
            betas (`tuple(float, float)`, defaults to (0.9, 0.999)):
                The beta values are the decay rates of the first and second-order moment of the optimizer.
            weight_decay (`float`, defaults to 0):
                The weight decay value for the optimizer.
            optim_bits (`int`, defaults to 32):
                The number of bits of the optimizer state.
            args (`object`, defaults to `None`):
                An object with additional arguments.
            min_8bit_size (`int`, defaults to 4096):
                The minimum number of elements of the parameter tensors for 8-bit optimization.
            percentile_clipping (`int`, defaults to 100):
                Adapts clipping threshold automatically by tracking the last 100 gradient norms and clipping the gradient at a certain percentile to improve stability.
            block_wise (`bool`, defaults to `True`):
                Whether to independently quantize each block of tensors to reduce outlier effects and improve stability.
        r	   r
   r&   Tr   Nr   	r   r   r   r   r   r   r   r   r   r   r   r   r         "
zPagedLion8bit.__init__r   r   r   Nr   r   Tr   r   r   r   r   r.          r.   c                       r-   )PagedLion32bitr   r   r   Nr   r   Tc	           	         r/   )a  
        Paged 32-bit Lion optimizer.

        Arguments:
            params (`torch.tensor`):
                The input parameters to optimize.
            lr (`float`, defaults to 1e-4):
                The learning rate.
            betas (`tuple(float, float)`, defaults to (0.9, 0.999)):
                The beta values are the decay rates of the first and second-order moment of the optimizer.
            weight_decay (`float`, defaults to 0):
                The weight decay value for the optimizer.
            optim_bits (`int`, defaults to 32):
                The number of bits of the optimizer state.
            args (`object`, defaults to `None`):
                An object with additional arguments.
            min_8bit_size (`int`, defaults to 4096):
                The minimum number of elements of the parameter tensors for 8-bit optimization.
            percentile_clipping (`int`, defaults to 100):
                Adapts clipping threshold automatically by tracking the last 100 gradient norms and clipping the gradient at a certain percentile to improve stability.
            block_wise (`bool`, defaults to `True`):
                Whether to independently quantize each block of tensors to reduce outlier effects and improve stability.
        r	   r
   r   Tr   Nr   r0   r   r   r   r     r1   zPagedLion32bit.__init__r2   r   r   r   r   r   r4     r3   r4   N)bitsandbytes.optim.optimizerr   r   r$   r+   r,   r.   r4   r   r   r   r   <module>   s   74443