o
    }oi                     @   s   d dl Z d dlmZmZ d dlmZ d dlmZmZ d dl	m
Z d dlmZ d dlmZ d dlmZ d dlmZ G d	d
 d
ejeeeZG dd dejeeeZdS )    N)ABCabstractmethod)deepcopy)ListOptional)OptimizerLRScheduler)	Optimizer)IOMixin)CallbackMethodsc                   @   s4   e Zd ZdZd
ddZedefddZdd	 ZdS )LRSchedulerModulea/  A module to standardize the learning rate scheduler setup and configuration.

    This class decouples the learning rate scheduler from the model, similar to how the LightningDataModule
    decouples data handling. It also acts as a Callback to hook into the training loop, which can be useful
    for adding custom all-reduces, logging, early stopping, etc. Next to that standard Lightning callback-event,
    this also supports hooking into the Megatron forward-backward function at a granular level.

    Example::

        class MyLRSchedulerModule(LRSchedulerModule):
            def setup(self, model, optimizer):
                # Custom setup logic
                ...

            def scheduler(self, model, optimizers):
                # Define and return the learning rate scheduler
                ...

    Methods:
        setup(model, optimizer): Sets up the learning rate scheduler.
        scheduler(model, optimizers): Abstract method to define the learning rate scheduler.
        __call__(model, optimizers): Calls the setup and scheduler methods.
    returnNc                 C   s   dS )zSets up the learning rate scheduler.

        Args:
            model: The model for which the scheduler is being set up.
            optimizer: The optimizer for which the scheduler is being set up.
        N )selfmodel	optimizerr   r   U/home/ubuntu/.local/lib/python3.10/site-packages/nemo/lightning/pytorch/optim/base.pyconnect5   s   zLRSchedulerModule.connectc                 C      t d)a7  Abstract method to define the learning rate scheduler.

        Args:
            model: The model for which the scheduler is being defined.
            optimizers: The optimizers for which the scheduler is being defined.

        Returns:
            OptimizerLRScheduler: The learning rate scheduler.
        z9The scheduler method should be implemented by subclasses.NotImplementedErrorr   r   
optimizersr   r   r   	scheduler>   s   zLRSchedulerModule.schedulerc                 C   s:   |  || | ||| _t| jttfs|| jfS | jS )a%  Calls the setup and scheduler methods.

        Args:
            model: The model for which the scheduler is being called.
            optimizers: The optimizers for which the scheduler is being called.

        Returns:
            OptimizerLRScheduler: The learning rate scheduler.
        )r   r   
_scheduler
isinstancedicttupler   r   r   r   __call__K   s
   
zLRSchedulerModule.__call__r   N)	__name__
__module____qualname____doc__r   r   r   r   r   r   r   r   r   r      s    
	r   c                   @   sl   e Zd ZdZdee fddZdejddfdd	Z	e
dee fd
dZdddZddejdefddZdS )OptimizerModulea  A module to standardize the optimizer setup and configuration.

    This class decouples the optimizer from the model, similar to how the LightningDataModule
    decouples data handling. It also acts as a Callback to hook into the training loop, which can be useful
    for adding custom all-reduces, logging, early stopping, etc. Next to that standard Lightning callback-event,
    this also supports hooking into the Megatron forward-backward function at a granular level.

    Attributes:
        lr_scheduler (Optional[LRSchedulerModule]): The learning rate scheduler module.

    Example::

        class MyOptimizerModule(OptimizerModule):
            def __init__(self, lr_scheduler=None):
                super().__init__(lr_scheduler)

            def setup(self, model):
                # Custom setup logic
                ...

            def optimizers(self, model):
                # Define and return the optimizers
                ...

    Methods:
        connect(model, trainer): Connects the optimizer module to the model and trainer.
        setup(model): Sets up the optimizer.
        optimizers(model): Abstract method to define the optimizers.
        __call__(model, megatron_parallel): Calls the setup and optimizers methods.
    lr_schedulerc                 C   s
   || _ dS )zInitializes the OptimizerModule.

        Args:
            lr_scheduler (Optional[LRSchedulerModule]): The learning rate scheduler module.
        N)r$   )r   r$   r   r   r   __init__   s   
zOptimizerModule.__init__r   r   Nc                    s`   d fdd	}t |||_ |_t dr*t|dr,t|jdr.t j|j_dS dS dS dS )zConnects the optimizer module to the model and trainer.

        Args:
            model (L.LightningModule): The model to which the optimizer module is being connected.
        Nc                    s    | |d}|S )N)megatron_parallelr   )lightning_module_selfr&   optr   r   r   custom_configure_optimizers   s   z<OptimizerModule.connect.<locals>.custom_configure_optimizers__io__optimN)types
MethodTypeconfigure_optimizersr,   hasattrr+   r   )r   r   r*   r   r)   r   r      s   zOptimizerModule.connectc                 C   r   )zAbstract method to define the optimizers.

        Args:
            model: The model for which the optimizers are being defined.

        Returns:
            List[Optimizer]: The list of optimizers.
        z:The optimizers method should be implemented by subclasses.r   )r   r   r   r   r   r      s   
zOptimizerModule.optimizersc                 C   sR   | j d ur't| j d jdkr| j d jd d }nd}|jd|ddd d S d S )Nr   lrg           T)
batch_sizeprog_bar)_optimizerslenparam_groupslog)r   trainer	pl_modulebatch	batch_idxr2   r   r   r   on_train_batch_start   s   
z$OptimizerModule.on_train_batch_startc                 C   s   |du r|n|}|j j}| |vr||  | jdur%| j|vr%|| j | || _t| jdkr7| jd n| j}| jdurG| ||}|S | jS )aB  Calls the setup and optimizers methods.

        Args:
            model (L.LightningModule): The model for which the optimizers are being called.
            megatron_parallel: Optional parallel model.

        Returns:
            OptimizerLRScheduler: The optimizers and optionally the learning rate scheduler.
        Nr3   r   )r:   	callbacksappendr$   r   r6   r7   )r   r   r&   _modelr?   _optwith_schedulerr   r   r   r      s   


zOptimizerModule.__call__r   r-   )r   r    r!   r"   r   r   r%   LLightningModuler   r   r   r   r   r>   r   r   r   r   r   r   r#   `   s    
	r#   )r.   abcr   r   copyr   typingr   r   lightning.pytorchpytorchrD   !lightning.pytorch.utilities.typesr   torch.optimr   nemo.lightning.io.mixinr	    nemo.lightning.megatron_parallelr
   Callbackr   r#   r   r   r   r   <module>   s   D