o
    oi2                     @   sD   d dl Z d dlmZ ddlmZmZmZmZ dZG dd deZ	dS )    N)	Optimizer   )Betas2OptFloatOptLossClosureParams)NovoGradc                       sv   e Zd ZdZ						ddeded	ed
edededef fddZde	ddf fddZ
ddedefddZ  ZS )r   a  Implements Novograd optimization algorithm.

    It has been proposed in `Stochastic Gradient Methods with Layer-wise
    Adaptive Moments for Training of Deep Networks`__.

    Arguments:
        params: iterable of parameters to optimize or dicts defining
            parameter groups
        lr: learning rate (default: 1e-3)
        betas: coefficients used for computing
            running averages of gradient and its square (default: (0.95, 0))
        eps: term added to the denominator to improve
            numerical stability (default: 1e-8)
        weight_decay: weight decay (L2 penalty) (default: 0)
        grad_averaging: gradient averaging (default: False)
        amsgrad: whether to use the AMSGrad variant of this
            algorithm from the paper `On the Convergence of Adam and Beyond`
            (default: False)

    Example:
        >>> import torch_optimizer as optim
        >>> optimizer = optim.Yogi(model.parameters(), lr=0.1)
        >>> optimizer.zero_grad()
        >>> loss_fn(model(input), target).backward()
        >>> scheduler = StepLR(optimizer, step_size=1, gamma=0.7)
        >>> optimizer.step()
        >>> scheduler.step()

    __ https://arxiv.org/abs/1905.11286

    Note:
        Reference code: https://github.com/NVIDIA/DeepLearningExamples
    MbP?gffffff?r   :0yE>r   Fparamslrbetasepsweight_decaygrad_averagingamsgradc           	         s   |dkrt d||dk rt d|d|d   kr"dk s,n t d|d d|d   kr8dk sBn t d|d |dk rMt d	|t||||||d
}tt| || d S )Ng        zInvalid learning rate: {}zInvalid epsilon value: {}r   g      ?z%Invalid beta parameter at index 0: {}r   z%Invalid beta parameter at index 1: {}zInvalid weight_decay value: {})r   r   r   r   r   r   )
ValueErrorformatdictsuperr   __init__)	selfr   r   r   r   r   r   r   defaults	__class__ V/home/ubuntu/SoloSpeech/.venv/lib/python3.10/site-packages/torch_optimizer/novograd.pyr   ,   s2   
	zNovoGrad.__init__statereturnNc                    s,   t t| | | jD ]}|dd qd S )Nr   F)r   r   __setstate__param_groups
setdefault)r   r   groupr   r   r   r    Q   s   
zNovoGrad.__setstate__closurec                 C   s  d}|dur	| }| j D ]}|d D ]}|jdu rq|jj}|jr'd}t||d }| j| }t|dkr^d|d< t|j|d< t	g 
|d j|d< |r^t	g 
|d j|d	< |d |d }	}
|rm|d	 }|d
 \}}|d  d7  < tt|d}|
dkr|
| n|
|j|d| d |rtj||
|d | |d }n	|
 |d }|| |d dkr|j|j|d d |d r|d|  |	|| |jj|	|d  d qq|S )zPerforms a single optimization step.

        Arguments:
            closure: A closure that reevaluates the model and returns the loss.
        Nr   zNNovoGrad does not support sparse gradients, please consider SparseAdam insteadr   r   stepexp_avg
exp_avg_sqmax_exp_avg_sqr   r      )alpha)outr   r   r   r   )r!   graddata	is_sparseRuntimeErrorr   lentorch
zeros_likezerostodevicesumpowcopy_mul_add_maxsqrtdiv_)r   r$   lossr#   pr,   msgr   r   r&   r'   r(   beta1beta2normdenomr   r   r   r%   V   s\   





>zNovoGrad.step)r	   r
   r   r   FF)N)__name__
__module____qualname____doc__r   floatr   boolr   r   r    r   r   r%   __classcell__r   r   r   r   r   	   s2    %%r   )
r1   torch.optim.optimizerr   typesr   r   r   r   __all__r   r   r   r   r   <module>   s
    