o
    پi	                     @   sV   d Z ddlZddlm  mZ ddlmZmZ ddl	Z	ddl
mZ G dd deZdS )a1  
SGDP Optimizer Implementation copied from https://github.com/clovaai/AdamP/blob/master/adamp/sgdp.py

Paper: `Slowing Down the Weight Norm Increase in Momentum-based Optimizers` - https://arxiv.org/abs/2006.08217
Code: https://github.com/clovaai/AdamP

Copyright (c) 2020-present NAVER Corp.
MIT license
    N)	Optimizerrequired   )
projectionc                       s@   e Zd Zedddddddf fdd	Ze d
dd	Z  ZS )SGDPr   Fg:0yE>g?c
              
      s.   t ||||||||	d}
tt| ||
 d S )N)lrmomentum	dampeningweight_decaynesterovepsdeltawd_ratio)dictsuperr   __init__)selfparamsr   r   r	   r
   r   r   r   r   defaults	__class__ C/home/ubuntu/.local/lib/python3.10/site-packages/timm/optim/sgdp.pyr      s   
zSGDP.__init__Nc              
   C   sZ  d }|d urt   | }W d    n1 sw   Y  | jD ]}|d }|d }|d }|d }|d D ]s}|jd u r>q6|j}	| j| }
t|
dkrSt ||
d< |
d }||j|	d| d |rl|	||  }n|}d}t|j	d	krt
||	||d
 |d |d \}}|dkr|d|d |d  | d	|    |j||d  d q6q |S )Nr
   r   r	   r   r   r   g      ?)alphar   r   r   r   r   )torchenable_gradparam_groupsgradstatelen
zeros_likemul_add_shaper   )r   closurelossgroupr
   r   r	   r   pr   r   bufd_pr   r   r   r   step,   s<   



"&z	SGDP.step)N)	__name__
__module____qualname__r   r   r   no_gradr*   __classcell__r   r   r   r   r      s    r   )__doc__r   torch.nn.functionalnn
functionalFtorch.optim.optimizerr   r   mathadampr   r   r   r   r   r   <module>   s    
