o
    ziN                     @   sH   d dl Z d dlmZ ddlmZmZmZmZmZ dZ	G dd deZ
dS )    N)	Optimizer   )Betas2Nus2OptFloatOptLossClosureParams)QHAdamc                       s`   e Zd ZdZ						dded	ed
ededededef fddZ	dde
defddZ  ZS )r	   a  Implements the QHAdam optimization algorithm.

    It has been proposed in `Adaptive methods for Nonconvex Optimization`__.

    Arguments:
        params: iterable of parameters to optimize or dicts defining
            parameter groups
        lr: learning rate (default: 1e-3)
        betas: coefficients used for computing
            running averages of gradient and its square (default: (0.9, 0.999))
        nus: immediate discount factors used to estimate the gradient and its
            square (default: (1.0, 1.0))
        eps: term added to the denominator to improve
            numerical stability (default: 1e-8)
        weight_decay: weight decay (L2 penalty) (default: 0)
        decouple_weight_decay: whether to decouple the weight
            decay from the gradient-based optimization step (default: False)

    Example:
        >>> import torch_optimizer as optim
        >>> optimizer = optim.QHAdam(model.parameters(), lr=0.1)
        >>> optimizer.zero_grad()
        >>> loss_fn(model(input), target).backward()
        >>> optimizer.step()

    __ https://arxiv.org/abs/1810.06801

    Note:
        Reference code: https://github.com/facebookresearch/qhoptim
    MbP?g?g+?      ?r           F:0yE>paramslrbetasnusweight_decaydecouple_weight_decayepsc           	         s   |dkrt d||dk rt d|d|d   kr"dk s,n t d|d d|d   kr8dk sBn t d|d |dk rMt d	|||||||d
}tt| || d S )Nr   zInvalid learning rate: {}zInvalid epsilon value: {}r   r   z%Invalid beta parameter at index 0: {}r   z%Invalid beta parameter at index 1: {}zInvalid weight_decay value: {})r   r   r   r   r   r   )
ValueErrorformatsuperr	   __init__)	selfr   r   r   r   r   r   r   defaults	__class__ J/home/ubuntu/.local/lib/python3.10/site-packages/torch_optimizer/qhadam.pyr   )   s0   
zQHAdam.__init__Nclosurereturnc                 C   s  d}|dur	| }| j D ]}|d }|d \}}|d \}}|d }	|d }
|d }|d D ]}|jdu r6q.|jj}|jrAtd	| j| }|	d
kr_|
rW|jd||	   n|j|j|	d ||}t	|d
krd|d< d|d< t
|j|d< t
|j|d< d||d   |d< d||d   |d< |d }|d }|d }|d }dd|  }dd|  }||j|d| d ||j|d| d ||}|dkr|j|d| d ||}|dkr|j|d| d |  |dkr|| |jj||| d q.q|S )zPerforms a single optimization step.

        Arguments:
            closure: A closure that reevaluates the model and returns the loss.
        Nr   r   r   r   r   r   r   zLQHAdam does not support sparse gradients, please consider SparseAdam insteadr   r   )alphar   beta1_weightbeta2_weightexp_avg
exp_avg_sqr   )value)param_groupsgraddata	is_sparseRuntimeErrorstatemul_add_mullentorch
zeros_likesqrt_addcdiv_)r   r!   lossgroupr   beta1beta2nu1nu2r   r   r   pd_pr.   d_p_sqr$   r%   r&   r'   	beta1_adj	beta2_adjavg_gradavg_grad_rmsr   r   r    stepN   sf   






5zQHAdam.step)r
   r   r   r   Fr   )N)__name__
__module____qualname____doc__r   floatr   r   boolr   r   r   rD   __classcell__r   r   r   r    r	   	   s0    "%r	   )r3   torch.optim.optimizerr   typesr   r   r   r   r   __all__r	   r   r   r   r    <module>   s
    