o
    zi&                     @   s|   d dl Z d dlZd dlmZ d dlZd dlmZ ddlmZm	Z	m
Z
 dZG dd deZG d	d
 d
eZG dd deZdS )    N)Optional)	Optimizer   )OptFloatOptLossClosureParams)	A2GradUni	A2GradInc	A2GradExpc                	       R   e Zd ZdZ			ddedee dedef fdd	Zdd
ede	fddZ
  ZS )r   a  Implements A2GradUni Optimizer Algorithm.

    It has been proposed in `Optimal Adaptive and Accelerated Stochastic
    Gradient Descent`__.

    Arguments:
        params: iterable of parameters to optimize or dicts defining
            parameter groups
        lr: not used for this optimizer (default: None)
        beta:  (default: 10)
        lips: Lipschitz constant (default: 10)


    Example:
        >>> import torch_optimizer as optim
        >>> optimizer = optim.A2GradUni(model.parameters(), lips=10)
        >>> optimizer.zero_grad()
        >>> loss_fn(model(input), target).backward()
        >>> optimizer.step()

    __ https://arxiv.org/abs/1810.00553

    Note:
        Reference code: https://github.com/severilov/A2Grad_optimizer
    N
   paramslrbetalipsc                    sL   t |||d}|dk rtd||dk rtd|t || d S )Nr   r   r           Invalid beta value: {}Invalid lips value: {})dict
ValueErrorformatsuper__init__selfr   r   r   r   defaults	__class__ J/home/ubuntu/.local/lib/python3.10/site-packages/torch_optimizer/a2grad.pyr   (   s   zA2GradUni.__init__closurereturnc              	   C   s  d}|dur	| }| j D ]}|d D ]}|jdu rq|jj}| j| }t|dkrDd|d< d|d< d|d< t||d< t|j|d	< d
|d  |d d  }|d }||d  || |	|d d  t
j||dd}	|d  t
|	|	  7  < t|d }
d
|d d  }d||d |
   }|d	 }|j|| d |jd|  |jj||d |jj|d|  |d  | d ||d< |d  d7  < qq|S Performs a single optimization step.

        Arguments:
            closure: A closure that reevaluates the model and returns the loss.
        Nr   r   stepr   alpha_kv_kavg_gradx_k   r   alpha   r   param_groupsgraddatastatelencopydeepcopymul_add_div_torchaddsumitemmathsqrtr   r!   lossgrouppr1   r3   gamma_kr(   delta_kh_k	alpha_k_1coefx_k_1r   r   r    r%   :   sH   



'zA2GradUni.stepNr   r   N__name__
__module____qualname____doc__r   r   floatr   r   r   r%   __classcell__r   r   r   r    r      s    r   c                	       r   )r	   a  Implements A2GradInc Optimizer Algorithm.

    It has been proposed in `Optimal Adaptive and Accelerated Stochastic
    Gradient Descent`__.

    Arguments:
        params: iterable of parameters to optimize or dicts defining
            parameter groups
        lr: not used for this optimizer (default: None)
        beta:  (default: 10)
        lips: Lipschitz constant (default: 10)


    Example:
        >>> import torch_optimizer as optim
        >>> optimizer = optim.A2GradInc(model.parameters(), lips=10)
        >>> optimizer.zero_grad()
        >>> loss_fn(model(input), target).backward()
        >>> optimizer.step()

    __ https://arxiv.org/abs/1810.00553

    Note:
        Reference code: https://github.com/severilov/A2Grad_optimizer
    Nr   r   r   r   r   c                    sP   |dk rt d||dk rt d|t|||d}tt| || d S )Nr   r   zInvalid weight_decay value: {}r   )r   r   r   r   r	   r   r   r   r   r    r      s   zA2GradInc.__init__r!   r"   c              	   C   s  d}|dur	| }| j D ]}|d D ]}|jdu rq|jj}| j| }t|dkrDd|d< d|d< d|d< t||d< t|j|d	< d
|d  |d d  }|d }||d  || |	|d d  t
j||dd}	|d  |d |d d  d
 9  < |d  t
|	|	  7  < t|d }
d
|d d  }d||d |
   }|d	 }|j|| d |jd|  |jj||d |jj|d|  |d  | d ||d< |d  d7  < qq|S r#   r/   r@   r   r   r    r%      sJ   



$(zA2GradInc.steprJ   rK   rL   r   r   r   r    r	   o   s    r	   c                       sX   e Zd ZdZ				ddedee deded	ef
 fd
dZddede	fddZ
  ZS )r
   au  Implements A2GradExp Optimizer Algorithm.

    It has been proposed in `Optimal Adaptive and Accelerated Stochastic
    Gradient Descent`__.

    Arguments:
        params: iterable of parameters to optimize or dicts defining
            parameter groups
        lr: not used for this optimizer (default: None)
        beta:  (default: 10)
        lips: Lipschitz constant (default: 10)
        rho: represents the degree of weighting decrease, a constant
            smoothing factor between 0 and 1 (default: 0.5)

    Example:
        >>> import torch_optimizer as optim
        >>> optimizer = optim.A2GradExp(model.parameters(), lips=10)
        >>> optimizer.zero_grad()
        >>> loss_fn(model(input), target).backward()
        >>> optimizer.step()

    __ https://arxiv.org/abs/1810.00553

    Note:
        Reference code: https://github.com/severilov/A2Grad_optimizer
    Nr         ?r   r   r   r   rhoc                    sp   t ||||d}tt| || |dk rtd||dk r'td||dk s/|dkr6td|d S )N)r   r   rT   r   r   r   r   g      ?zInvalid rho value: {})r   r   r
   r   r   r   )r   r   r   r   r   rT   r   r   r   r    r      s   	zA2GradExp.__init__r!   r"   c              
   C   s  d}|dur	| }| j D ]}|d D ]}|jdu rq|jj}| j| }t|dkrDd|d< d|d< d|d< t||d< t|j|d	< d
|d  |d d  }|d }||d  || |	|d d  t
j||dd}	|d dkrt
|	|	  |d< n|d  |d 9  < |d  d|d  t
|	|	   7  < t|d |d g|d< t|d d |d  }
d
|d d  }d||d |
   }|d	 }|j||d |jd|  |jj||d |jj|d| |d  | d ||d< |d  d7  < qq|S )r$   Nr   r   r%   r   r&   r'   r(   r)   r*   r   r+   r,   v_kkrT   r.   r   )r0   r1   r2   r3   r4   r5   r6   r7   r8   r9   r:   r;   r<   r=   maxr>   r?   r@   r   r   r    r%      sX   



0zA2GradExp.step)Nr   r   rS   rK   rL   r   r   r   r    r
      s$    r
   )r5   r>   typingr   r:   torch.optim.optimizerr   typesr   r   r   __all__r   r	   r
   r   r   r   r    <module>   s    b_