o
    }oih!                     @   s4  d dl Z d dlmZ d dlmZmZmZmZ d dlZd dl	Z	d dl
mZ d dlmZmZ d dl
mZmZmZmZmZ d dlmZ d dlmZmZmZ d dlmZ d d	lmZ d d
lmZ d dl m!Z! d dl"m#Z# ej$ej%ej&ej'ej(ej)ej*ej+eeedZ,zd dl-m.Z.m/Z/ dZ0e/e,d< e.e,d< W n e1y   dZ0Y nw dZ2e0rzd dl3m4Z4 dZ2e4e,d< W n e5e1fy   dZ2Y nw zd dl6m7Z7 e7e,d< W n e5e1fy   Y nw g dZ8de9deeee9ef f deee9ef ef fddZ:de9dedefd d!Z;de9d"eee9ef  defd#d$Z<defd%d&Z=dS )'    N)partial)AnyDictOptionalUnion)
DictConfig	OmegaConf)adadeltaadagradadamaxrmsproprprop)	Optimizer)OptimizerParamsget_optimizer_configregister_optimizer_params)	Adafactor)Adan)Novograd)logging)maybe_update_config_version)sgdadamadamwr	   r   r
   r   r   novograd	adafactoradan)	FusedAdam	FusedLAMBTlamb
fused_adamF)MegatronDistributedFusedAdamdistributed_fused_adam)MegatronFusedAdammegatron_fused_adam)get_optimizerregister_optimizerparse_optimizer_argsoptimizer_nameoptimizer_kwargsreturnc           	      C   s  i }|du r|S t |}t|}t|trtj|dd}t|drd|v r7t|}t	j
|}t|}|S d|v r|d dkrLd| }|d n|d}d	|v r[|d	}n|}t|tritj|dd}t|fi |}|du r{t|}|S | }t|}|S |S |S )
a  
    Parses a list of strings, of the format "key=value" or "key2=val1,val2,..."
    into a dictionary of type {key=value, key2=[val1, val2], ...}

    This dictionary is then used to instantiate the chosen Optimizer.

    Args:
        optimizer_name: string name of the optimizer, used for auto resolution of params
        optimizer_kwargs: Either a list of strings in a specified format,
            or a dictionary. If a dictionary is provided, it is assumed the dictionary
            is the final parsed value, and simply returned.
            If a list of strings is provided, each item in the list is parsed into a
            new dictionary.

    Returns:
        A dictionary
    NT)resolvekeys_target_nameauto	{}_paramsparams)copydeepcopyr   
isinstancer   r   to_containerhasattrcreatehydrautilsinstantiatevarsformatpopgetr   )	r(   r)   kwargsoptimizer_kwargs_configoptimizer_instanceoptimizer_params_nameoptimizer_params_overrideoptimizer_params_clsoptimizer_params rF   N/home/ubuntu/.local/lib/python3.10/site-packages/nemo/core/optim/optimizers.pyr'   P   s@   






r'   r.   	optimizerrE   c                 C   s:   | t v rtd|  |t | < d|j}t||d dS )a|  
    Checks if the optimizer name exists in the registry, and if it doesnt, adds it.

    This allows custom optimizers to be added and called by name during instantiation.

    Args:
        name: Name of the optimizer. Will be used as key to retrieve the optimizer.
        optimizer: Optimizer class
        optimizer_params: The parameters as a dataclass of the optimizer
    zFCannot override pre-existing optimizers. Conflicting optimizer name = r0   )r.   rE   N)AVAILABLE_OPTIMIZERS
ValueErrorr<   __name__r   )r.   rH   rE   
optim_namerF   rF   rG   r&      s
   r&   r?   c                 K   sV   | t vrtd|  dt   | dkrtj stdt |  }t|fi |}|S )a2  
    Convenience method to obtain an Optimizer class and partially instantiate it with optimizer kwargs.

    Args:
        name: Name of the Optimizer in the registry.
        kwargs: Optional kwargs of the optimizer used during instantiation.

    Returns:
        a partially instantiated Optimizer
    zCannot resolve optimizer 'z'. Available optimizers are : r    z)CUDA must be available to use fused_adam.)rI   rJ   r,   torchcudais_availabler   )r.   r?   rH   rF   rF   rG   r%      s   
r%   c                 C   s   t jt jf}tr|tf7 }t| |rO| jD ];}|d D ]2}| j| }t|dkrMt	j
|jt	jd|d< t	j
|jt	jd|d< |drMt	j
|t	jd|d< qqd S d S )Nr1   r   )memory_formatexp_avg
exp_avg_sqamsgradmax_exp_avg_sq)optimAdamAdamW	HAVE_APEXr   r4   param_groupsstatelenrM   
zeros_likedatapreserve_formatr>   )rH   adam_nondist_optimsgroupprZ   rF   rF   rG   init_optimizer_states   s    




rb   )>r2   	functoolsr   typingr   r   r   r   r8   rM   torch.optimrU   	omegaconfr   r   r	   r
   r   r   r   torch.optim.optimizerr   nemo.core.configr   r   r   nemo.core.optim.adafactorr   nemo.core.optim.adanr   nemo.core.optim.novogradr   
nemo.utilsr   nemo.utils.model_utilsr   SGDrV   rW   AdadeltaAdamaxAdagradRMSpropRproprI   apex.optimizersr   r   rX   ModuleNotFoundErrorHAVE_APEX_DISTRIBUTED_ADAM nemo.core.optim.distributed_adamr!   ImportError#nemo.core.optim.megatron_fused_adamr#   __all__strr'   r&   r%   rb   rF   rF   rF   rG   <module>   sz   
Q"