o
    ziU                     @   sL   d dl Z d dlZd dlmZ ddlmZmZmZmZ dZ	G dd deZ
dS )    N)	Optimizer   )Betas2OptFloatOptLossClosureParams)RAdamc                       sd   e Zd ZdZ				ddededed	ed
eddf fddZ fddZdde	de
fddZ  ZS )r   a  Implements RAdam optimization algorithm.

    It has been proposed in `On the Variance of the Adaptive Learning
    Rate and Beyond`__.

    Arguments:
        params: iterable of parameters to optimize or dicts defining
            parameter groups
        lr: learning rate (default: 1e-3)
        betas: coefficients used for computing
            running averages of gradient and its square (default: (0.9, 0.999))
        eps: term added to the denominator to improve
            numerical stability (default: 1e-8)
        weight_decay: weight decay (L2 penalty) (default: 0)

    Example:
        >>> import torch_optimizer as optim
        >>> optimizer = optim.RAdam(model.parameters(), lr=0.1)
        >>> optimizer.zero_grad()
        >>> loss_fn(model(input), target).backward()
        >>> optimizer.step()

    __ https://arxiv.org/abs/1908.03265

    Note:
        Reference code: https://github.com/LiyuanLucasLiu/RAdam
    MbP?g?g+?:0yE>r   paramslrbetasepsweight_decayreturnNc                    sH  |dkrt d||dk rt d|d|d   kr"dk s,n t d|d d|d   kr8dk sBn t d|d |dk rMt d	|t|ttfrt|dkrt|d tr|D ]%}d
|v r|d
 d |d ks}|d
 d |d krdd tdD |d< qct||||dd tdD d}tt	| 
|| d S )Ng        zInvalid learning rate: {}zInvalid epsilon value: {}r   g      ?z%Invalid beta parameter at index 0: {}r   z%Invalid beta parameter at index 1: {}zInvalid weight_decay value: {}r   c                 S      g | ]}g d qS )NNN .0_r   r   I/home/ubuntu/.local/lib/python3.10/site-packages/torch_optimizer/radam.py
<listcomp>K       z"RAdam.__init__.<locals>.<listcomp>
   bufferc                 S   r   r   r   r   r   r   r   r   R   r   )r   r   r   r   r   )
ValueErrorformat
isinstancelisttuplelendictrangesuperr   __init__)selfr   r   r   r   r   paramdefaults	__class__r   r   r&   (   sF   zRAdam.__init__c                    s   t t| | d S N)r%   r   __setstate__)r'   stater*   r   r   r-   V   s   zRAdam.__setstate__closurec                 C   s  d}|dur	| }| j D ]@}|d }|d }|d \}}|d }|d D ]&}	|	jdu r.q%|	jj }
|
jr=d}t||	j }| j|	 }t|dkr`d|d	< t	||d
< t	||d< n|d
 
||d
< |d 
||d< |d
 |d }}||j|
|
d| d ||j|
d| d |d	  d7  < |d t|d	 d  }|d	 |d kr|d |d }}n_|d	 |d< ||d	  }dd|  d }|d|d	  | d|   }||d< |dkr|td| |d  |d  |d  | | |d   d||d	    }n
|d||d	    }||d< |dkr(|j|| | d |dkr>| |}|j||| d n|j|| d |	j| q%q|S )zPerforms a single optimization step.

        Arguments:
            closure: A closure that reevaluates the model and returns the loss.
        Nr   r   r   r   r   zKRAdam does not support sparse gradients, please consider SparseAdam insteadr   stepexp_avg
exp_avg_sqr   )value)alphar   r            )param_groupsgraddatafloat	is_sparseRuntimeErrorr.   r"   torch
zeros_liketype_asmul_addcmul_add_intmathsqrtaddcdiv_copy_)r'   r/   lossgroupr   r   beta1beta2r   pr9   msgp_data_fp32r.   r1   r2   bufferedN_sma	step_sizebeta2_t	N_sma_maxdenomr   r   r   r0   Y   s   






Jz
RAdam.step)r	   r
   r   r   r,   )__name__
__module____qualname____doc__r   r;   r   r&   r-   r   r   r0   __classcell__r   r   r*   r   r      s*    .r   )rE   r>   torch.optim.optimizerr   typesr   r   r   r   __all__r   r   r   r   r   <module>   s    