o
    oi}                     @   sL   d dl Z d dlZd dlmZ ddlmZmZmZmZ dZ	G dd deZ
dS )    N)	Optimizer   )Betas2OptFloatOptLossClosureParams)Lambc                       sj   e Zd ZdZ							dded	ed
ededededededdf fddZdde	de
fddZ  ZS )r   a  Implements Lamb algorithm.

    It has been proposed in `Large Batch Optimization for Deep Learning:
    Training BERT in 76 minutes`__.

    Arguments:
        params: iterable of parameters to optimize or dicts defining
            parameter groups
        lr: learning rate (default: 1e-3)
        betas: coefficients used for computing
            running averages of gradient and its square (default: (0.9, 0.999))
        eps: term added to the denominator to improve
            numerical stability (default: 1e-8)
        weight_decay: weight decay (L2 penalty) (default: 0)
        clamp_value: clamp weight_norm in (0,clamp_value) (default: 10)
            set to a high value to avoid it (e.g 10e3)
        adam: always use trust ratio = 1, which turns this
            into Adam. Useful for comparison purposes. (default: False)
        debias: debias adam by (1 - beta**step) (default: False)

    Example:
        >>> import torch_optimizer as optim
        >>> optimizer = optim.Lamb(model.parameters(), lr=0.1)
        >>> optimizer.zero_grad()
        >>> loss_fn(model(input), target).backward()
        >>> optimizer.step()

    __ https://arxiv.org/abs/1904.00962

    Note:
        Reference code: https://github.com/cybertronai/pytorch-lamb
    MbP?g?g+?ư>r   
   Fparamslrbetasepsweight_decayclamp_valueadamdebiasreturnNc	           
         s   |dkrt d||dk rt d|d|d   kr"dk s,n t d|d d|d   kr8dk sBn t d|d |dk rMt d	||dk rXt d
|t||||d}	|| _|| _|| _tt| ||	 d S )Ng        zInvalid learning rate: {}zInvalid epsilon value: {}r   g      ?z%Invalid beta parameter at index 0: {}r   z%Invalid beta parameter at index 1: {}zInvalid weight_decay value: {}zInvalid clamp value: {})r   r   r   r   )	
ValueErrorformatdictr   r   r   superr   __init__)
selfr   r   r   r   r   r   r   r   defaults	__class__ R/home/ubuntu/SoloSpeech/.venv/lib/python3.10/site-packages/torch_optimizer/lamb.pyr   -   s.   zLamb.__init__closurec                 C   s  d}|dur	| }| j D ]}|d D ]}|jdu rq|jj}|jr'd}t|| j| }t|dkrDd|d< t||d< t||d< |d |d }}	|d \}
}|d  d	7  < |	|
j
|d	|
 d
 |		|j||d	| d | jrtd	||d   }|d	|
|d    }nd	}|d | }t|jd| j}||	 |d  }|d dkr|j
|j|d d
 t|}|dks|dkrd	}n|| }||d< ||d< ||d< | jrd	}|jj
|| | d
 qq|S )zPerforms a single optimization step.

        Arguments:
            closure: A closure that reevaluates the model and returns the loss.
        Nr   zJLamb does not support sparse gradients, please consider SparseAdam insteadr   stepexp_avg
exp_avg_sqr   r   )alpha)valuer   r   r   weight_norm	adam_normtrust_ratio)param_groupsgraddata	is_sparseRuntimeErrorstatelentorch
zeros_likemul_add_addcmul_r   mathsqrtnormclampr   addr   )r   r!   lossgrouppr+   msgr/   r#   r$   beta1beta2bias_correction	step_sizer'   	adam_stepr(   r)   r   r   r    r"   R   sV   



=z	Lamb.step)r	   r
   r   r   r   FF)N)__name__
__module____qualname____doc__r   floatr   boolr   r   r   r"   __classcell__r   r   r   r    r      s:    $	
%r   )r6   r1   torch.optim.optimizerr   typesr   r   r   r   __all__r   r   r   r   r    <module>   s    