o
    ‚o™i®$  ã                   @   sd   d dl Z d dlmZ d dlmZ G dd„ deƒZG dd„ deƒZG dd	„ d	eƒZG d
d„ deƒZdS )é    N)Ú	Optimizer)ÚOptimizer1Statec                       s0   e Zd Z									d
‡ fdd	„	Z‡  ZS )ÚLARSr   Fé    Né   éd   ç{®Gáz”?c                    s<   |dkrt dƒ‚tƒ jd||||fd||||	|
|dd dS )aG  
        Base LARS optimizer.

        Arguments:
            params (`torch.tensor`):
                The input parameters to optimize.
            lr (`float`):
                The learning rate.
            momentum (`float`, defaults to 0):
                The momentum value speeds up the optimizer by taking bigger steps.
            dampening (`float`, defaults to 0):
                The dampening value reduces the momentum of the optimizer.
            weight_decay (`float`, defaults to 1e-2):
                The weight decay value for the optimizer.
            nesterov (`bool`, defaults to `False`):
                Whether to use Nesterov momentum.
            optim_bits (`int`, defaults to 32):
                The number of bits of the optimizer state.
            args (`object`, defaults to `None`):
                An object with additional arguments.
            min_8bit_size (`int`, defaults to 4096):
                The minimum number of elements of the parameter tensors for 8-bit optimization.
            percentile_clipping (`int`, defaults to 100):
                Adapts clipping threshold automatically by tracking the last 100 gradient norms and clipping the gradient at a certain percentile to improve stability.
            max_unorm (`float`, defaults to 0.02):
                The maximum gradient norm.
        r   ú'LARS without momentum is not supported!Úlarsç        F©Ú	max_unormÚ
block_wiseN©ÚNotImplementedErrorÚsuperÚ__init__)ÚselfÚparamsÚlrÚmomentumÚ	dampeningÚweight_decayÚnesterovÚ
optim_bitsÚargsÚmin_8bit_sizeÚpercentile_clippingr   ©Ú	__class__© úK/home/ubuntu/.local/lib/python3.10/site-packages/bitsandbytes/optim/lars.pyr      s    )
ôzLARS.__init__)	r   r   r   Fr   Nr   r   r   ©Ú__name__Ú
__module__Ú__qualname__r   Ú__classcell__r    r    r   r!   r      s    ôr   c                       ó.   e Zd Z								d	‡ fdd„	Z‡  ZS )
ÚLARS8bitr   FNr   r   r   c                    ó<   |dkrt dƒ‚tƒ jd||||fd|d|||	|
dd dS )	aÝ  
        8-bit LARS optimizer.

        Arguments:
            params (`torch.tensor`):
                The input parameters to optimize.
            lr (`float`):
                The learning rate.
            momentum (`float`, defaults to 0):
                The momentum value speeds up the optimizer by taking bigger steps.
            dampening (`float`, defaults to 0):
                The dampening value reduces the momentum of the optimizer.
            weight_decay (`float`, defaults to 1e-2):
                The weight decay value for the optimizer.
            nesterov (`bool`, defaults to `False`):
                Whether to use Nesterov momentum.
            args (`object`, defaults to `None`):
                An object with additional arguments.
            min_8bit_size (`int`, defaults to 4096):
                The minimum number of elements of the parameter tensors for 8-bit optimization.
            percentile_clipping (`int`, defaults to 100):
                Adapts clipping threshold automatically by tracking the last 100 gradient norms and clipping the gradient at a certain percentile to improve stability.
            max_unorm (`float`, defaults to 0.02):
                The maximum gradient norm.
        r   r	   r
   r   é   Fr   Nr   ©r   r   r   r   r   r   r   r   r   r   r   r   r    r!   r   H   ó    &
ôzLARS8bit.__init__©r   r   r   FNr   r   r   r"   r    r    r   r!   r(   G   ó    õr(   c                       r'   )
Ú	LARS32bitr   FNr   r   r   c                    r)   )	aÞ  
        32-bit LARS optimizer.

        Arguments:
            params (`torch.tensor`):
                The input parameters to optimize.
            lr (`float`):
                The learning rate.
            momentum (`float`, defaults to 0):
                The momentum value speeds up the optimizer by taking bigger steps.
            dampening (`float`, defaults to 0):
                The dampening value reduces the momentum of the optimizer.
            weight_decay (`float`, defaults to 1e-2):
                The weight decay value for the optimizer.
            nesterov (`bool`, defaults to `False`):
                Whether to use Nesterov momentum.
            args (`object`, defaults to `None`):
                An object with additional arguments.
            min_8bit_size (`int`, defaults to 4096):
                The minimum number of elements of the parameter tensors for 8-bit optimization.
            percentile_clipping (`int`, defaults to 100):
                Adapts clipping threshold automatically by tracking the last 100 gradient norms and clipping the gradient at a certain percentile to improve stability.
            max_unorm (`float`, defaults to 0.02):
                The maximum gradient norm.
        r   r	   r
   r   r   Fr   Nr   r+   r   r    r!   r      r,   zLARS32bit.__init__r-   r"   r    r    r   r!   r/   €   r.   r/   c                       sH   e Zd Z						d‡ fdd„	Z‡ fdd„Ze ¡ dd
d„ƒZ‡  ZS )ÚPytorchLARSç{®Gáz„?r   Fr   c           	         s„   |dk rt d|› ƒ‚|dk rt d|› ƒ‚|dk r!t d|› ƒ‚t||||||d}|r9|dks5|dkr9t dƒ‚tƒ  ||¡ d S )Nr   zInvalid learning rate: zInvalid momentum value: zInvalid weight_decay value: )r   r   r   r   r   r   r   z8Nesterov momentum requires a momentum and zero dampening)Ú
ValueErrorÚdictr   r   )	r   r   r   r   r   r   r   r   Údefaultsr   r    r!   r   º   s"   
úzPytorchLARS.__init__c                    s(   t ƒ  |¡ | jD ]}| dd¡ q	d S )Nr   F)r   Ú__setstate__Úparam_groupsÚ
setdefault)r   ÚstateÚgroupr   r    r!   r5   ×   s   
ÿzPytorchLARS.__setstate__Nc                 C   sˆ  d}|durt  ¡  |ƒ }W d  ƒ n1 sw   Y  | jD ]¡}g }g }g }|d }|d }|d }	|d }
|d }|d }|d D ]|}|jdu rLqD| j| }|j}|d	kr_|j||d
}|d	kr| dd¡}|du ryt  |¡ ¡ }||d< n| 	|¡j
|d|	 d
 |
rŽ|||  }n|}d}|dkr¶|jt jksžJ ‚t  | ¡ ¡}t  |¡}||| kr¶|| | }|j
|| | d
 qDq |S )z±Performs a single optimization step.

        Args:
            closure (callable, optional): A closure that reevaluates the model
                and returns the loss.
        Nr   r   r   r   r   r   r   r   )ÚalphaÚmomentum_bufferé   g      ð?r   )ÚtorchÚenable_gradr6   Úgradr8   ÚaddÚgetÚcloneÚdetachÚmul_Úadd_ÚdtypeÚfloat32Únorm)r   ÚclosureÚlossr9   Úparams_with_gradÚd_p_listÚmomentum_buffer_listr   r   r   r   r   r   Úpr8   Úd_pÚbufÚupdateÚupdate_scaleÚpnormÚunormr    r    r!   ÚstepÜ   sR   
ÿ




á!zPytorchLARS.step)r1   r   r   r   Fr   )N)	r#   r$   r%   r   r5   r=   Úno_gradrU   r&   r    r    r   r!   r0   ¹   s    ør0   )	r=   Útorch.optimr   Úbitsandbytes.optim.optimizerr   r   r(   r/   r0   r    r    r    r!   Ú<module>   s   <99