o
    zi                     @   sD   d dl Z d dlmZ ddlmZmZmZmZ dZG dd deZ	dS )    N)	Optimizer   )OptFloatOptLossClosureParamsState)SGDWc                       st   e Zd ZdZ					ddedededed	ed
eddf fddZdeddf fddZ	dde
defddZ  ZS )r   aX  Implements SGDW algorithm.

    It has been proposed in `Decoupled Weight Decay Regularization`__.

    Arguments:
        params: iterable of parameters to optimize or dicts defining
            parameter groups
        lr: learning rate (default: 1e-3)
        momentum: momentum factor (default: 0)
        weight_decay: weight decay (L2 penalty) (default: 0)
        dampening: dampening for momentum (default: 0)
        nesterov: enables Nesterov momentum (default: False)

    Example:
        >>> import torch_optimizer as optim
        >>> optimizer = optim.SGDW(model.parameters(), lr=0.1, momentum=0.9)
        >>> optimizer.zero_grad()
        >>> loss_fn(model(input), target).backward()
        >>> optimizer.step()

    __ https://arxiv.org/abs/1711.05101

    Note:
        Reference code: https://github.com/pytorch/pytorch/pull/22466
    MbP?        Fparamslrmomentum	dampeningweight_decaynesterovreturnNc                    s   |dkrt d||dk rt d||dk r!t d||dk r,t d|t|||||d}|rC|dks?|dkrCt dtt| || d S )	Nr
   zInvalid learning rate: {}zInvalid momentum value: {}zInvalid dampening value: {}zInvalid weight_decay value: {})r   r   r   r   r   r   z8Nesterov momentum requires a momentum and zero dampening)
ValueErrorformatdictsuperr   __init__)selfr   r   r   r   r   r   defaults	__class__ H/home/ubuntu/.local/lib/python3.10/site-packages/torch_optimizer/sgdw.pyr   $   s,   	zSGDW.__init__statec                    s,   t t| | | jD ]}|dd qd S )Nr   F)r   r   __setstate__param_groups
setdefault)r   r   groupr   r   r   r   E   s   
zSGDW.__setstate__closurec                 C   s  d}|dur	| }| j D ]{}|d }|d }|d }|d }|d D ]d}|jdu r*q"|jj}	|jjr8d}
t|
|dkrl| j| }d	|vrQt|	  }|d	< n|d	 }|	|j
|	d
| d |rj|	||}	n|}	|jj
|	|d  d |dkr|jj
||d  d q"q|S )zPerforms a single optimization step.

        Arguments:
            closure: A closure that reevaluates the model and returns the loss.
        Nr   r   r   r   r   zJSGDW does not support sparse gradients, please consider SparseAdam insteadr   momentum_bufferr   )alphar   )r   graddata	is_sparseRuntimeErrorr   torchclonedetachmul_add_add)r   r"   lossr!   r   r   r   r   pd_pmsgparam_statebufr   r   r   stepJ   sF   


 z	SGDW.step)r	   r
   r
   r
   F)N)__name__
__module____qualname____doc__r   floatboolr   r   r   r   r   r5   __classcell__r   r   r   r   r   	   s0    !r   )
r)   torch.optim.optimizerr   typesr   r   r   r   __all__r   r   r   r   r   <module>   s
    