o
    zi                     @   sX   d dl Z d dlZd dlmZ d dlmZ ddlmZmZm	Z	m
Z
 dZG dd deZdS )    N)	Optimizer   )Betas2OptFloatOptLossClosureParams)Yogic                       s^   e Zd ZdZ					ddeded	ed
edededdf fddZddede	fddZ
  ZS )r   a9  Implements Yogi Optimizer Algorithm.
    It has been proposed in `Adaptive methods for Nonconvex Optimization`__.

    Arguments:
        params: iterable of parameters to optimize or dicts defining
            parameter groups
        lr: learning rate (default: 1e-2)
        betas: coefficients used for computing
            running averages of gradient and its square (default: (0.9, 0.999))
        eps: term added to the denominator to improve
            numerical stability (default: 0.001)
        initial_accumulator: initial values for first and
            second moments (default: 1e-6)
        weight_decay: weight decay (L2 penalty) (default: 0)

    Example:
        >>> import torch_optimizer as optim
        >>> optimizer = optim.Yogi(model.parameters(), lr=0.01)
        >>> optimizer.zero_grad()
        >>> loss_fn(model(input), target).backward()
        >>> optimizer.step()

    __ https://papers.nips.cc/paper/8186-adaptive-methods-for-nonconvex-optimization  # noqa

    Note:
        Reference code: https://github.com/4rtemi5/Yogi-Optimizer_Keras
    {Gz?g?g+?MbP?ư>r   paramslrbetasepsinitial_accumulatorweight_decayreturnNc                    s   |dkrt d||dk rt d|d|d   kr"dk s,n t d|d d|d   kr8dk sBn t d|d |dk rMt d	|t|||||d
}tt| || d S )Ng        zInvalid learning rate: {}zInvalid epsilon value: {}r   g      ?z%Invalid beta parameter at index 0: {}r   z%Invalid beta parameter at index 1: {}zInvalid weight_decay value: {})r   r   r   r   r   )
ValueErrorformatdictsuperr   __init__)selfr   r   r   r   r   r   defaults	__class__ H/home/ubuntu/.local/lib/python3.10/site-packages/torch_optimizer/yogi.pyr   )   s0   	zYogi.__init__closurec                 C   s  d}|dur	| }| j D ]}|d D ]}|jdu rq|jj}|jr%td| j| }t|dkrXd|d< tj	t
j|jt
jd|d |d< tj	t
j|jt
jd|d |d	< |d |d	 }}|d
 \}	}
|d  d7  < d|	|d   }d|
|d   }|d dkr|j|j|d d}||	j|d|	 d ||}|jt
|| |d|
  d | t| |d }|d | }|jj||| d qq|S )zPerforms a single optimization step.

        Arguments:
            closure: A closure that reevaluates the model and returns the loss.
        Nr   zJYogi does not support sparse gradients, please consider SparseAdam insteadr   step)memory_formatr   exp_avg
exp_avg_sqr   r   r   )alpha)valuer   r   )param_groupsgraddata	is_sparseRuntimeErrorstatelennninit	constant_torch
empty_likepreserve_formataddmul_add_muladdcmul_signsqrtmathaddcdiv_)r   r   lossgrouppr'   r+   r"   r#   beta1beta2bias_correction1bias_correction2grad_squareddenom	step_sizer   r   r   r    L   s`   



=z	Yogi.step)r	   r
   r   r   r   )N)__name__
__module____qualname____doc__r   floatr   r   r   r   r    __classcell__r   r   r   r   r      s.    #r   )r:   r0   torch.nnr-   torch.optim.optimizerr   typesr   r   r   r   __all__r   r   r   r   r   <module>   s    