o
    پi                     @   s0   d dl Z d dlZd dlmZ G dd deZdS )    N)	Optimizerc                       s>   e Zd ZdZ					d fdd	Ze dd
dZ  ZS )NAdamLegacyaa  Implements Nadam algorithm (a variant of Adam based on Nesterov momentum).

    NOTE: This impl has been deprecated in favour of torch.optim.NAdam and remains as a reference

    It has been proposed in `Incorporating Nesterov Momentum into Adam`__.

    Arguments:
        params (iterable): iterable of parameters to optimize or dicts defining
            parameter groups
        lr (float, optional): learning rate (default: 2e-3)
        betas (Tuple[float, float], optional): coefficients used for computing
            running averages of gradient and its square
        eps (float, optional): term added to the denominator to improve
            numerical stability (default: 1e-8)
        weight_decay (float, optional): weight decay (L2 penalty) (default: 0)
        schedule_decay (float, optional): momentum schedule decay (default: 4e-3)

    __ http://cs229.stanford.edu/proj2015/054_report.pdf
    __ http://www.cs.toronto.edu/~fritz/absps/momentum.pdf

        Originally taken from: https://github.com/pytorch/pytorch/pull/1408
        NOTE: Has potential issues but does work well on some problems.
    Mb`?g?g+?:0yE>r   Mbp?c                    s>   d|kst d|t|||||d}tt| || d S )Ng        zInvalid learning rate: {})lrbetasepsweight_decayschedule_decay)
ValueErrorformatdictsuperr   __init__)selfparamsr   r	   r
   r   r   defaults	__class__ D/home/ubuntu/.local/lib/python3.10/site-packages/timm/optim/nadam.pyr       s   	zNAdamLegacy.__init__Nc                 C   s  d}|durt   | }W d   n1 sw   Y  | jD ]}|d D ]}|jdu r.q&|j}| j| }t|dkrRd|d< d|d< t ||d< t ||d< |d }|d	 }|d |d }	}
|d
 \}}|d }|d  d7  < |d }d||  }|d dkr|j||d d}|ddd||     }|ddd|d |     }|| }|| | }||d< |	|j	|d| d |
|j
||d| d |
 t| 	|}|j|||d  d|  d|  d |j|	||d  | d|  d q&q |S )zPerforms a single optimization step.

        Arguments:
            closure (callable, optional): A closure that reevaluates the model
                and returns the loss.
        Nr   r   stepg      ?
m_scheduleexp_avg
exp_avg_sqr   r	   r
      r   )alphag      ?gQ?)valuer   )torchenable_gradparam_groupsgradstatelen
zeros_likeaddmul_add_addcmul_sqrtmathaddcdiv_)r   closurelossgrouppr#   r$   r   r   r   r   beta1beta2r
   tbias_correction2momentum_cache_tmomentum_cache_t_1m_schedule_newm_schedule_nextdenomr   r   r   r   4   sL   



&$(zNAdamLegacy.step)r   r   r   r   r   )N)	__name__
__module____qualname____doc__r   r    no_gradr   __classcell__r   r   r   r   r      s    r   )r,   r    torch.optim.optimizerr   r   r   r   r   r   <module>   s    