o
    zi                     @   sP   d dl Z d dlZd dlmZ ddlmZmZmZmZm	Z	 dZ
G dd deZdS )    N)	Optimizer   )Betas2OptFloatOptLossClosureParamsState)AdaBoundc                       s   e Zd ZdZ							dded	ed
ededededededdf fddZde	ddf fddZ
ddedefddZ  ZS )r	   a{  Implements AdaBound algorithm.

    It has been proposed in `Adaptive Gradient Methods with Dynamic Bound of
    Learning Rate`__.

    Arguments:
        params: iterable of parameters to optimize or dicts defining
            parameter groups
        lr: learning rate (default: 1e-3)
        betas: coefficients used for computing running averages of gradient
            and its square (default: (0.9, 0.999))
        final_lr: final (SGD) learning rate (default: 0.1)
        gamma: convergence speed of the bound functions
            (default: 1e-3)
        eps: term added to the denominator to improve numerical stability
            (default: 1e-8)
        weight_decay: weight decay (L2 penalty) (default: 0)
        amsbound: whether to use the AMSBound variant of this algorithm

    Example:
        >>> import torch_optimizer as optim
        >>> optimizer = optim.AdaBound(model.parameters(), lr=0.1)
        >>> optimizer.zero_grad()
        >>> loss_fn(model(input), target).backward()
        >>> optimizer.step()

    __ https://arxiv.org/abs/1902.09843

    Note:
        Reference code: https://github.com/Luolc/AdaBound
    MbP?g?g+?皙?:0yE>r   Fparamslrbetasfinal_lrgammaepsweight_decayamsboundreturnNc	           
   	      s  |dkrt d||dk rt d|d|d   kr"dk s,n t d|d d|d   kr8dk sBn t d|d |dk rMt d	|d|  krWdk s_n t d
||dk rjt d|t|||||||d}	tt| ||	 dd | jD | _d S )Ng        zInvalid learning rate: {}zInvalid epsilon value: {}r   g      ?z%Invalid beta parameter at index 0: {}r   z%Invalid beta parameter at index 1: {}zInvalid final learning rate: {}zInvalid gamma parameter: {}zInvalid weight_decay value: {})r   r   r   r   r   r   r   c                 S   s   g | ]}|d  qS )r    ).0groupr   r   L/home/ubuntu/.local/lib/python3.10/site-packages/torch_optimizer/adabound.py
<listcomp>W   s    z%AdaBound.__init__.<locals>.<listcomp>)
ValueErrorformatdictsuperr	   __init__param_groupsbase_lrs)
selfr   r   r   r   r   r   r   r   defaults	__class__r   r   r    ,   sB   	zAdaBound.__init__statec                    s,   t t| | | jD ]}|dd qd S )Nr   F)r   r	   __setstate__r!   
setdefault)r#   r'   r   r%   r   r   r(   Y   s   
zAdaBound.__setstate__closurec                 C   s6  d}|dur	| }t | j| jD ]\}}|d D ]}|jdu r!q|jj}|jr.d}t||d }| j| }	t|	dkrXd|	d< t	
||	d< t	
||	d< |rXt	
||	d	< |	d |	d }
}|rg|	d	 }|d
 \}}|	d  d7  < |d dkr|j|j|d d}|
|j|d| d ||j||d| d |rt	j|||d | |d }n	| |d }d||	d   }d||	d   }|d t| | }|d |d  | }|dd|d |	d  d    }|dd|d |	d     }t	||}|||||
 |j|  qq|S )zPerforms a single optimization step.

        Arguments:
            closure: A closure that reevaluates the model and returns the loss.
        Nr   zNAdaBound does not support sparse gradients, please consider SparseAdam insteadr   r   stepexp_avg
exp_avg_sqmax_exp_avg_sqr   r   r   )alpha)value)outr   r   r   r   )zipr!   r"   graddata	is_sparseRuntimeErrorr'   lentorch
zeros_likeaddmul_add_addcmul_maxsqrtmath	full_likediv_clamp_)r#   r*   lossr   base_lrpr3   msgr   r'   r,   r-   r.   beta1beta2denombias_correction1bias_correction2	step_sizer   lower_boundupper_boundr   r   r   r+   ^   sn   

	HzAdaBound.step)r
   r   r   r
   r   r   F)N)__name__
__module____qualname____doc__r   floatr   boolr    r   r(   r   r   r+   __classcell__r   r   r%   r   r	      s<    #	
-r	   )r@   r8   torch.optim.optimizerr   typesr   r   r   r   r   __all__r	   r   r   r   r   <module>   s    