o
    zi~                     @   sH   d dl Z d dlmZ ddlmZmZmZmZmZ dZ	G dd deZ
dS )    N)	Optimizer   )Betas2OptFloatOptLossClosureParamsState)SWATSc                       sv   e Zd ZdZ						ddededed	ed
ededef fddZde	ddf fddZ
ddedefddZ  ZS )r	   a  Implements SWATS Optimizer Algorithm.
    It has been proposed in `Improving Generalization Performance by
    Switching from Adam to SGD`__.

    Arguments:
        params: iterable of parameters to optimize or dicts defining
            parameter groups
        lr: learning rate (default: 1e-2)
        betas: coefficients used for computing
            running averages of gradient and its square (default: (0.9, 0.999))
        eps: term added to the denominator to improve
            numerical stability (default: 1e-3)
        weight_decay: weight decay (L2 penalty) (default: 0)
        amsgrad: whether to use the AMSGrad variant of this
            algorithm from the paper `On the Convergence of Adam and Beyond`
            (default: False)
        nesterov: enables Nesterov momentum (default: False)


    Example:
        >>> import torch_optimizer as optim
        >>> optimizer = optim.SWATS(model.parameters(), lr=0.01)
        >>> optimizer.zero_grad()
        >>> loss_fn(model(input), target).backward()
        >>> optimizer.step()

    __ https://arxiv.org/pdf/1712.07628.pdf

    Note:
        Reference code: https://github.com/Mrpatekful/swats
    MbP?g?g+?r   Fparamslrbetasepsweight_decayamsgradnesterovc           	   	      s   d|kst d|d|kst d|d|d   kr"dk s,n t d|d d|d   kr8dk sBn t d|d |dk rMt d	|t|||d
|||d}t || d S )Ng        zInvalid learning rate: {}zInvalid epsilon value: {}r   g      ?z%Invalid beta parameter at index 0: {}r   z%Invalid beta parameter at index 1: {}zInvalid weight_decay value: {}ADAM)r   r   r   phaser   r   r   )
ValueErrorformatdictsuper__init__)	selfr   r   r   r   r   r   r   defaults	__class__ I/home/ubuntu/.local/lib/python3.10/site-packages/torch_optimizer/swats.pyr   *   s4   

zSWATS.__init__statereturnNc                    s4   t  | | jD ]}|dd |dd q	d S )Nr   Fr   )r   __setstate__param_groups
setdefault)r   r    groupr   r   r   r"   P   s
   
zSWATS.__setstate__closurec                 C   s  d}|dur	| }| j D ]k}|d D ]c}|jdu rq|jj}|jr'td|d }| j| }t|dkr^d|d< t|j|d< t|j|d< |	d	
d|d
< |r^t|j|d< |d |d
 |d }}	}
|rr|d }|d \}}|d  d	7  < |d dkr|j|j|d d |d dkrd|vrt|  }|d< n|d }||| |}|d	|  |d r|j||d |jj||d  d q||j|d	| d |
|j||d	| d |rtj||
|d | |d }n	|
 |d }d	||d   }d	||d   }|d |d  | }| ||  }|j| |d}||d}|dkrw|||  }|	|j|d	| d |	| }|d d	krw|j|ddrw|dkrwd|d< | |d< qq|S )zPerforms a single optimization step.

        Arguments:
            closure: A closure that reevaluates the model and returns the loss.
        Nr   zJAdam does not support sparse gradients, please consider SparseAdam insteadr   r   stepexp_avg
exp_avg_sqr   exp_avg2max_exp_avg_sqr   r   )alphar   SGDmomentum_bufferr   r   )value)outr   g      ?gư>)rtol)r#   graddata	is_sparseRuntimeErrorr    lentorch
zeros_likenewfill_add_clonedetachmul_addcmul_maxsqrtviewdotallcloseitem)r   r&   lossr%   wr3   r   r    r(   r*   r)   r+   beta1beta2bufdenombias_correction1bias_correction2	step_sizepp_viewpgscalingcorrected_exp_avgr   r   r   r'   V   s   





fz
SWATS.step)r
   r   r
   r   FF)N)__name__
__module____qualname____doc__r   floatr   boolr   r   r"   r   r   r'   __classcell__r   r   r   r   r	   	   s2    #&r	   )r8   torch.optim.optimizerr   typesr   r   r   r   r   __all__r	   r   r   r   r   <module>   s
    