o
    }oi                     @   s   d dl mZmZmZ d dlmZ d dlmZ d dlm	Z	 d dl
mZ d dlmZ d dlmZmZ dd Zdd	d
ZG dd deZdS )    )CallableListOptionalN)	Optimizer)ParamsT)MegatronParallel)LRSchedulerModuleOptimizerModulec                 C   s   d| v S )Nbias )
param_nameparamr   r   X/home/ubuntu/.local/lib/python3.10/site-packages/nemo/lightning/pytorch/optim/pytorch.py_param_does_not_have_wd   s   r   c                 C   s   g g }}|d ur'|   D ]\}}|jsq|||r || q|| qnttdd |  }ttt||fdksAJ ddd t	||f|dfD S )Nc                 S   s   | j S N)requires_grad)xr   r   r   <lambda>)   s    z1_extract_model_params_for_optim.<locals>.<lambda>r   z+Expected at least one optimizer with paramsc                 S   s   g | ]	\}}||d qS ))paramsweight_decayr   ).0r   wdr   r   r   
<listcomp>-   s    z3_extract_model_params_for_optim.<locals>.<listcomp>)
named_parametersr   appendlistfilter
parametersmaxmaplenzip)modelr   no_weight_decay_condparams_with_wdparams_without_wdnamer   r   r   r   _extract_model_params_for_optim   s   

r'   c                       s   e Zd ZdZdeddfdeegef dee	 dee dee de
f
 fd	d
ZdddZdee fddZdejddfddZ  ZS )PytorchOptimizerModulea  A OptimizerModule for pytorch optimizers.

    Attributes:
        optimizer_fn (Callable[[ParamsT], Optimizer]): Configuration for the optimizer.
        no_weight_decay_cond (Optional[Callable]): Condition for no weight decay.
        scale_lr_cond (Optional[Callable]): Condition for scaling learning rate.
        lr_mult (float): Learning rate multiplier.

    Example::

        optimizer_fn = run.Partial(
            SGD,
            lr=lr,
            weight_decay=wd,
        )
        lr_scheduler = MyLRSchedulerModule(...)
        optimizer_module = PytorchOptimizerModule(optimizer_fn, lr_scheduler)

    Methods:
        setup(model): Sets up the optimizer.
        optimizers(model): Defines the optimizers.
    Ng      ?optimizer_fnlr_schedulerr#   scale_lr_condlr_multc                    s*   t  j|d || _|| _|| _|| _dS )a  Initializes the PytorchOptimizerModule.

        Args:
            optimizer_fn (Callable[[ParamsT], Optimizer]): Configuration for the optimizer.
            lr_scheduler (Optional[LRSchedulerModule]): The learning rate scheduler module.
            no_weight_decay_cond (Optional[Callable]): Condition for no weight decay.
            scale_lr_cond (Optional[Callable]): Condition for scaling learning rate.
            lr_mult (float): Learning rate multiplier.
        )r*   N)super__init__r)   r#   r+   r,   )selfr)   r*   r#   r+   r,   	__class__r   r   r.   K   s
   
zPytorchOptimizerModule.__init__trainer
pl.Trainer	pl_modulepl.LightningModulec                 C   s   dS )nooopNr   )r/   r2   r4   r   r   r   on_fit_startc   s   z#PytorchOptimizerModule.on_fit_startreturnc                    sn   t  tr	tdjjdd}t |j}|_t |t	s&|g}j
du r-|S  fdd|D S )a%  Defines the optimizers.

        Args:
            model (nn.Module): The model for which the optimizers are being defined.

        Returns:
            List[Optimizer]: The list of optimizers.

        Raises:
            ValueError: If the model is an instance of MegatronParallel.
        z/Model cannot be an instance of MegatronParallelr   r   Nc                    s   g | ]	}j  |qS r   )r*   	scheduler)r   optr"   r/   r   r   r      s    z5PytorchOptimizerModule.optimizers.<locals>.<listcomp>)
isinstancer   
ValueErrorr)   keywordsgetr'   r#   _optimizersr   r*   )r/   r"   r   optimr   r;   r   
optimizersh   s   


z!PytorchOptimizerModule.optimizersr"   c                    s    fdd _ dS )zConnects the optimizer module to the model.

        Args:
            model (L.LightningModule): The model to which the optimizer module is being connected.
        c                      s
     S r   )rB   r   r;   r   r   r      s   
 z0PytorchOptimizerModule.connect.<locals>.<lambda>N)configure_optimizers)r/   r"   r   r;   r   connect   s   zPytorchOptimizerModule.connect)r2   r3   r4   r5   )__name__
__module____qualname____doc__r   r   r   r   r   r   floatr.   r7   r   rB   LLightningModulerD   __classcell__r   r   r0   r   r(   3   s(    
r(   )r   N)typingr   r   r   lightning.pytorchpytorchplrJ   torch.optimr   torch.optim.optimizerr    nemo.lightning.megatron_parallelr   !nemo.lightning.pytorch.optim.baser   r	   r   r'   r(   r   r   r   r   <module>   s   
