o
    }oi$                     @   sv  d dl mZ d dlmZ d dlmZmZmZmZ d dl	m
Z
mZ g dZeG dd dZeG dd	 d	eZeG d
d deZeG dd deZeG dd deZeG dd deZeG dd deZeG dd deZeG dd deZeG dd deZeG dd deZdedefddZded eeeef  d!efd"d#Zeeeeeeeeeeed$Zd%S )&    )	dataclass)partial)AnyDictOptionalTuple)MISSING	OmegaConf)
OptimizerParams
AdamParamsNovogradParams	SGDParamsAdadeltaParamsAdamaxParamsAdagradParamsAdamWParamsRMSpropParamsRpropParamsc                   @   s"   e Zd ZU dZeZee ed< dS )r
   zw
    Base Optimizer params with no values. User can chose it to explicitly override via
    command line arguments
    lrN)	__name__
__module____qualname____doc__r   r   r   float__annotations__ r   r   O/home/ubuntu/.local/lib/python3.10/site-packages/nemo/core/config/optimizers.pyr
   #   s   
 r
   c                   @   sB   e Zd ZU dZdZeed< dZeed< dZeed< dZ	e
ed< dS )	r   aN  
    Default configuration for Adam optimizer.
    It is not derived from Config as it is not a NeMo object (and in particular it doesn't need a name).

    ..note:
        For the details on the function/meanings of the arguments, please refer to:
        https://pytorch.org/docs/stable/optim.html?highlight=sgd#torch.optim.SGD
    r   momentum	dampeningweight_decayFnesterovN)r   r   r   r   r   r   r   r   r   r    boolr   r   r   r   r   -      
 	r   c                   @   s6   e Zd ZU dZdZeed< dZeed< dZe	ed< dS )	r   aP  
    Default configuration for Adam optimizer.
    It is not derived from Config as it is not a NeMo object (and in particular it doesn't need a name).

    ..note:
        For the details on the function/meanings of the arguments, please refer to:
        https://pytorch.org/docs/stable/optim.html?highlight=adam#torch.optim.Adam
    :0yE>epsr   r   FamsgradN)
r   r   r   r   r$   r   r   r   r%   r!   r   r   r   r   r   >   s
   
 
r   c                   @   sJ   e Zd ZU dZdZeeef ed< dZeed< dZ	eed< dZ
eed	< d
S )r   aC  
    Default configuration for AdamW optimizer.
    It is not derived from Config as it is not a NeMo object (and in particular it doesn't need a name).

    ..note:
        For the details on the function/meanings of the arguments, please refer to:
        https://pytorch.org/docs/stable/optim.html#torch.optim.AdamW
    ?g+?betasr#   r$   r   r   Fr%   N)r   r   r   r   r(   r   r   r   r$   r   r%   r!   r   r   r   r   r   O   s   
 	r   c                   @   s6   e Zd ZU dZdZeed< dZeed< dZeed< dS )	r   aI  
    Default configuration for Adadelta optimizer.
    It is not derived from Config as it is not a NeMo object (and in particular it doesn't need a name).

    ..note:
        For the details on the function/meanings of the arguments, please refer to:
        https://pytorch.org/docs/stable/optim.html#torch.optim.Adadelta
    r'   rhoư>r$   r   r   N)	r   r   r   r   r)   r   r   r$   r   r   r   r   r   r   `   s
   
 	r   c                   @   s>   e Zd ZU dZdZeeef ed< dZeed< dZ	eed< dS )	r   aE  
    Default configuration for Adamax optimizer.
    It is not derived from Config as it is not a NeMo object (and in particular it doesn't need a name).

    ..note:
        For the details on the function/meanings of the arguments, please refer to:
        https://pytorch.org/docs/stable/optim.html#torch.optim.Adamax
    r&   r(   r#   r$   r   r   N)
r   r   r   r   r(   r   r   r   r$   r   r   r   r   r   r   p   s
   
 	r   c                   @   sB   e Zd ZU dZdZeed< dZeed< dZeed< dZ	eed< dS )	r   aG  
    Default configuration for Adagrad optimizer.
    It is not derived from Config as it is not a NeMo object (and in particular it doesn't need a name).

    ..note:
        For the details on the function/meanings of the arguments, please refer to:
        https://pytorch.org/docs/stable/optim.html#torch.optim.Adagrad
    r   lr_decayr   initial_accumulator_valueg|=r$   N)
r   r   r   r   r+   r   r   r   r,   r$   r   r   r   r   r      r"   r   c                   @   sN   e Zd ZU dZdZeed< dZeed< dZeed< dZ	eed< d	Z
eed
< dS )r   aG  
    Default configuration for RMSprop optimizer.
    It is not derived from Config as it is not a NeMo object (and in particular it doesn't need a name).

    ..note:
        For the details on the function/meanings of the arguments, please refer to:
        https://pytorch.org/docs/stable/optim.html#torch.optim.RMSprop
    gGz?alphar#   r$   r   r   r   FcenteredN)r   r   r   r   r-   r   r   r$   r   r   r.   r!   r   r   r   r   r      s   
 	r   c                   @   s:   e Zd ZU dZdZeeef ed< dZeeef ed< dS )r   aI  
    Default configuration for RpropParams optimizer.
    It is not derived from Config as it is not a NeMo object (and in particular it doesn't need a name).

    ..note:
        For the details on the function/meanings of the arguments, please refer to:
        https://pytorch.org/docs/stable/optim.html#torch.optim.Rprop
    )g      ?g333333?etas)r*   2   
step_sizesN)	r   r   r   r   r/   r   r   r   r1   r   r   r   r   r      s   
 	r   c                   @   sz   e Zd ZU dZdZeeef ed< dZeed< dZ	eed< dZ
eed	< dZeed
< dZeed< dZeed< dZeed< dS )r   a"  
    Configuration of the Novograd optimizer.

    It has been proposed  in "Stochastic Gradient Methods with Layer-wise
    Adaptive Moments for Training of Deep Networks"
    (https://arxiv.org/abs/1905.11286)

    Args:
        lr (float, optional): learning rate (default: 1e-3)
        betas (Tuple[float, float], optional): coefficients used for computing
            running averages of gradient and its square (default: (0.9, 0.999))
        eps (float, optional): term added to the denominator to improve
            numerical stability (default: 1e-8)
        weight_decay (float, optional): weight decay (L2 penalty) (default: 0)
        amsgrad (boolean, optional): whether to use the AMSGrad variant of this
            algorithm from the paper "On the Convergence of Adam and Beyond"
    )gffffff?g\(\?r(   r#   r$   r   r   Fgrad_averagingr%   lucMbP?	luc_trustluc_epsN)r   r   r   r   r(   r   r   r   r$   r   r2   r!   r%   r3   r5   r6   r   r   r   r   r      s   
 r   c                   @   sz   e Zd ZU dZdZeed< dZeeef ed< dZ	eed< dZ
eed	< d
Zeed< dZeed< dZeed< dZeed< dS )AdafactorParamsa  
    Configuration of the Adafactor optimizer.

    It has been proposed  in "Adafactor: Adaptive Learning Rates with Sublinear Memory Cost"
    (https://arxiv.org/abs/1804.04235)

    Args:
        lr (float, optional): learning rate (default: 1e-3)
        beta1 (float, optional): coefficients used for computing
            running averages of gradient and its square (default: None)
        eps (Tuple [float, float] optional)
        weight_decay (float, optional): weight decay (L2 penalty) (default: 0)
        scale_parameter (float, optional): scale parameter (default: False)
        relative_step (bool, optional): whether to use relative step sizes (default: False)
        warmup_init (bool, optional): whether to warmup the learning rate linearly (default: False)
    Nbeta1)gKH9r4   r$   g      ?clip_thresholdg?
decay_rater   r   Tscale_parameterFrelative_stepwarmup_init)r   r   r   r   r8   r   r   r$   r   r9   r:   r   r;   r!   r<   r=   r   r   r   r   r7      s   
 r7   nameoptimizer_paramsc                 C   s"   | t v rtd|  |t | < dS )aF  
    Checks if the optimizer param name exists in the registry, and if it doesnt, adds it.

    This allows custom optimizer params to be added and called by name during instantiation.

    Args:
        name: Name of the optimizer. Will be used as key to retrieve the optimizer.
        optimizer_params: Optimizer class
    zFCannot override pre-existing optimizers. Conflicting optimizer name = N)AVAILABLE_OPTIMIZER_PARAMS
ValueError)r>   r?   r   r   r   register_optimizer_params   s   
rB   kwargsreturnc                 K   st   | du r|S | t vrtd|  dt   t |  }|dur0t|dkr0t|}t| | t|fi |}|S )aC  
    Convenience method to obtain a OptimizerParams class and partially instantiate it with optimizer kwargs.

    Args:
        name: Name of the OptimizerParams in the registry.
        kwargs: Optional kwargs of the optimizer used during instantiation.

    Returns:
        a partially instantiated OptimizerParams
    Nz%Cannot resolve optimizer parameters 'z('. Available optimizer parameters are : r   )r@   rA   keyslenr	   createmerger   )r>   rC   scheduler_paramsr   r   r   get_optimizer_config   s   
rJ   )optim_paramsadam_paramsnovograd_params
sgd_paramsadadelta_paramsadamax_paramsadagrad_paramsadamw_paramsrmsprop_paramsrprop_paramsadafactor_paramsN)dataclassesr   	functoolsr   typingr   r   r   r   	omegaconfr   r	   __all__r
   r   r   r   r   r   r   r   r   r   r7   strrB   rJ   r@   r   r   r   r   <module>   sR   	"
