o
    oi                     @   sH   d dl Z d dlZd dlmZ ddlmZmZmZ dZG dd deZ	dS )    N)	Optimizer   )OptFloatOptLossClosureParams)SGDPc                       s   e Zd ZdZ								ddeded	ed
ededededededdf fddZedd Z	edd Z
edd Zdd Zd dedefddZ  ZS )!r   a  Implements SGDP algorithm.

    It has been proposed in `Slowing Down the Weight Norm Increase in
    Momentum-based Optimizers`__

    Arguments:
        params: iterable of parameters to optimize or dicts defining
            parameter groups
        lr: learning rate (default: 1e-3)
        momentum: momentum factor (default: 0)
        dampening: dampening for momentum (default: 0)
        eps: term added to the denominator to improve
            numerical stability (default: 1e-8)
        weight_decay: weight decay (L2 penalty) (default: 0)
        delta: threhold that determines whether a set of parameters is scale
            invariant or not (default: 0.1)
        wd_ratio: relative weight decay applied on scale-invariant parameters
            compared to that applied on scale-variant parameters (default: 0.1)
        nesterov: enables Nesterov momentum (default: False)


    Example:
        >>> import torch_optimizer as optim
        >>> optimizer = optim.SGDP(model.parameters(), lr=0.1)
        >>> optimizer.zero_grad()
        >>> loss_fn(model(input), target).backward()
        >>> optimizer.step()

     __ https://arxiv.org/abs/2006.08217

    Note:
        Reference code: https://github.com/clovaai/AdamP
    MbP?r   :0yE>皙?Fparamslrmomentum	dampeningepsweight_decaydeltawd_rationesterovreturnNc
              
      s   |dkrt d||dk rt d||dk r!t d||dk r,t d||dk r7t d||dk rBt d||dk rMt d	|t||||||||	d
}
tt| ||
 d S )Ng        zInvalid learning rate: {}zInvalid epsilon value: {}zInvalid momentum value: {}zInvalid dampening value: {}r   zInvalid weight_decay value: {}zInvalid delta value: {}zInvalid wd_ratio value: {})r   r   r   r   r   r   r   r   )
ValueErrorformatdictsuperr   __init__)selfr   r   r   r   r   r   r   r   r   defaults	__class__ R/home/ubuntu/SoloSpeech/.venv/lib/python3.10/site-packages/torch_optimizer/sgdp.pyr   .   s6   
zSGDP.__init__c                 C   s   |  | ddS )Nr   )viewsizexr   r   r   _channel_viewW   s   zSGDP._channel_viewc                 C   s   |  ddS )Nr   r    )r!   r#   r   r   r   _layer_view[   s   zSGDP._layer_viewc                 C   sT   || } ||}| j dd|}|j dd|}| | jdd}| | | S )Nr   dim)normadd_sumabs)r$   yr   	view_funcx_normy_normdotr   r   r   _cosine_similarity_   s   zSGDP._cosine_similarityc                 C   s   d}dgdgt |jd   }| j| jfD ]G}	| ||j||	}
|
 |t|	|j	d k r\|j|	|jj
dd|| }|||	|| jdd| 8 }|}||f  S q||fS )Nr   r    r'   )lenshaper%   r&   r2   datamaxmathsqrtr"   r)   r!   r*   r+   )r   pgradperturbr   r   r   wdexpand_sizer.   
cosine_simp_nr   r   r   _projectionj   s"   "zSGDP._projectionclosurec                 C   s<  d}|dur	| }| j D ]}|d }|d }|d }|d }|d D ]x}|jdu r*q"|jj}	| j| }
t|
dkrAt|j|
d< |
d }||j|	d| d	 |rZ|	||  }n|}d}t|j	dkrw| 
||	||d
 |d |d \}}|dkr|jd|d |d  | d|    |jj||d  d	 q"q|S )zPerforms a single optimization step.

        Arguments:
            closure: A closure that reevaluates the model and returns the loss.
        Nr   r   r   r   r   r   r   )alphar   r   r   r   )param_groupsr:   r5   stater3   torch
zeros_likemul_r*   r4   r@   )r   rA   lossgroupr   r   r   r   r9   r:   rD   bufd_pr   r   r   r   step~   sZ   



	,z	SGDP.step)r   r   r   r	   r   r
   r
   F)N)__name__
__module____qualname____doc__r   floatboolr   staticmethodr%   r&   r2   r@   r   r   rL   __classcell__r   r   r   r   r      sN    %	
)



r   )
r7   rE   torch.optim.optimizerr   typesr   r   r   __all__r   r   r   r   r   <module>   s    