o
    zi                     @   sL   d dl Z d dlZd dlmZ ddlmZmZmZmZ dZ	G dd deZ
dS )    N)	Optimizer   )Betas2OptFloatOptLossClosureParams)DiffGradc                       sX   e Zd ZdZ				ddededed	ed
eddf fddZddede	fddZ
  ZS )r   a  Implements DiffGrad algorithm.

    It has been proposed in `DiffGrad: An Optimization Method for
    Convolutional Neural Networks`__.

    Arguments:
        params: iterable of parameters to optimize or dicts defining
            parameter groups
        lr: learning rate (default: 1e-3)
        betas: coefficients used for computing
            running averages of gradient and its square (default: (0.9, 0.999))
        eps: term added to the denominator to improve
            numerical stability (default: 1e-8)
        weight_decay: weight decay (L2 penalty) (default: 0)

    Example:
        >>> import torch_optimizer as optim
        >>> optimizer = optim.DiffGrad(model.parameters(), lr=0.1)
        >>> optimizer.zero_grad()
        >>> loss_fn(model(input), target).backward()
        >>> optimizer.step()

    __ https://arxiv.org/abs/1909.11015

    Note:
        Reference code: https://github.com/shivram1987/diffGrad
    MbP?g?g+?:0yE>        paramslrbetasepsweight_decayreturnNc                    s   |dkrt d||dk rt d|d|d   kr"dk s,n t d|d d|d   kr8dk sBn t d|d |dk rMt d	|t||||d
}tt| || d S )Nr   zInvalid learning rate: {}zInvalid epsilon value: {}r         ?z%Invalid beta parameter at index 0: {}r   z%Invalid beta parameter at index 1: {}zInvalid weight_decay value: {})r   r   r   r   )
ValueErrorformatdictsuperr   __init__)selfr   r   r   r   r   defaults	__class__ L/home/ubuntu/.local/lib/python3.10/site-packages/torch_optimizer/diffgrad.pyr   (   s$   zDiffGrad.__init__closurec              	   C   s  d}|dur	| }| j D ]}|d \}}|d D ]}|jdu r q|jj}|jr-d}t|| j| }	t|	dkrQd|	d< t||	d< t||	d< t||	d	< |	d |	d |	d	 }
}}|	d  d
7  < |d dkrw|j	|j|d d |

|j	|d
| d |
|j||d
| d | 	|d }d
||	d   }d
||	d   }t|| }tddt|  }| |	d	< |
| }|d t| | }|jj||| d qq|S )zPerforms a single optimization step.

        Arguments:
            closure: A closure that reevaluates the model and returns the loss.
        Nr   r   zNDiffGrad does not support sparse gradients, please consider SparseAdam insteadr   stepexp_avg
exp_avg_sqprevious_gradr   r   )alpha)valuer   r   r   )param_groupsgraddata	is_sparseRuntimeErrorstatelentorch
zeros_likeadd_mul_addcmul_sqrtabsdivexpclonemathaddcdiv_)r   r   lossgroupbeta1beta2pr'   msgr+   r!   r"   r#   denombias_correction1bias_correction2diffdfcexp_avg1	step_sizer   r   r   r    D   sX   



:zDiffGrad.step)r	   r
   r   r   )N)__name__
__module____qualname____doc__r   floatr   r   r   r   r    __classcell__r   r   r   r   r      s(    r   )r7   r-   torch.optim.optimizerr   typesr   r   r   r   __all__r   r   r   r   r   <module>   s    