o
    yi                     @   s(   d dl Z d dlmZ G dd deZdS )    N)	Optimizerc                       sH   e Zd ZdZ									d fd
d	ZdddZedd Z  ZS )RangerQHap  Implements the QHAdam optimization algorithm `(Ma and Yarats, 2019)`_.
    Along with Hinton/Zhang Lookahead.
    Args:
        params (iterable):
            iterable of parameters to optimize or dicts defining parameter
            groups
        lr (float, optional): learning rate (:math:`\alpha` from the paper)
            (default: 1e-3)
        betas (Tuple[float, float], optional): coefficients used for computing
            running averages of the gradient and its square
            (default: (0.9, 0.999))
        nus (Tuple[float, float], optional): immediate discount factors used to
            estimate the gradient and its square
            (default: (1.0, 1.0))
        eps (float, optional): term added to the denominator to improve
            numerical stability
            (default: 1e-8)
        weight_decay (float, optional): weight decay (default: 0.0)
        decouple_weight_decay (bool, optional): whether to decouple the weight
            decay from the gradient-based optimization step
            (default: False)
    Example:
        >>> optimizer = qhoptim.pyt.QHAdam(
        ...     model.parameters(),
        ...     lr=3e-4, nus=(0.8, 1.0), betas=(0.99, 0.999))
        >>> optimizer.zero_grad()
        >>> loss_fn(model(input), target).backward()
        >>> optimizer.step()
    .. _`(Ma and Yarats, 2019)`: https://arxiv.org/abs/1810.06801
    MbP?g?g+?gffffff?      ?                 ?F:0yE>c
                    s   d|kst d|d|	kst d|	d|d   kr"dk s,n t d|d d|d   kr8dk sBn t d|d |dk rMt d	|||||||	d
}
t ||
 || _|| _d S )Nr   zInvalid learning rate: {}zInvalid epsilon value: {}r   r   z%Invalid beta parameter at index 0: {}   z%Invalid beta parameter at index 1: {}zInvalid weight_decay value: {})lrbetasnusweight_decaydecouple_weight_decayeps)
ValueErrorformatsuper__init__alphak)selfparamsr   r   r   r   r   r   r   r   defaults	__class__ K/home/ubuntu/.local/lib/python3.10/site-packages/pytorch_ranger/rangerqh.pyr   4   s(   
zRangerQH.__init__Nc                 C   s  d}|dur	| }| j D ]1}|d }|d \}}|d \}}|d }	|d }
|d }|d D ]}|jdu r8q/|jj}|jrCtd	|	d
kr[|
rT|jd||	   n||	|j ||}| j| }t	|d
krd|d< d|d< d
|d< t
|j|d< t
|j|d< t
|j|d< |d |j |d  d7  < d||d   |d< d||d   |d< |d }|d }|d }|d }dd|  }dd|  }||d| | ||d| | ||}|dkr|d| | ||}|dkr|d| | |  |dkr|| |j| || |d | j d
kr=|d }|| j|j|  |j| q/q|S )zPerforms a single optimization step.
        Args:
            closure (callable, optional):
                A closure that reevaluates the model and returns the loss.
        Nr   r   r   r   r   r   r   z(QHAdam does not support sparse gradientsr   r   r   beta1_weightbeta2_weightstepexp_avg
exp_avg_sqslow_bufferr   )param_groupsgraddata	is_sparseRuntimeErrormul_add_mulstatelentorch
zeros_like
empty_likecopy_sqrt_addcdiv_r   r   )r   closurelossgroupr   beta1beta2nu1nu2r   r   r   pd_pd_p_sqparam_stater    r!   r#   r$   	beta1_adj	beta2_adjavg_gradavg_grad_rmsslow_pr   r   r   r"   Z   st   







DzRangerQH.stepc                 C   s   |j |j|jf|j|jfdS )N)r   r   r   )r   r;   r<   r9   r:   )clsr   r   r   r   _params_to_dict   s   zRangerQH._params_to_dict)r   r   r   r   r	   r
   Fr   )N)	__name__
__module____qualname____doc__r   r"   classmethodrG   __classcell__r   r   r   r   r      s    "
&Xr   )r0   torch.optim.optimizerr   r   r   r   r   r   <module>   s   