o
    }oi\'                     @   s  d dl mZ d dlmZ d dlZd dlZd dlm	Z	 d dl
mZ d dlmZ d dlmZ d dlmZmZ d dlmZ d d	lmZmZ d d
lmZmZmZ d dlmZ d dlm Z  d dl!m"Z" d dl#m$Z$m%Z% d dl&m'Z' d dl(m)Z) d dl*m+Z+ dZ,ej-j.e,ddej/ej0 fddZ1										d5de2de2deej3 d ee2 d!e2d"e4d#e2d$e2d%e2d&ee5ej/e	   dej/ej6 fd'd(Z7ej-j.ee,d)dd*ddefd+ee8 d,e8d#e2d$e2dej9f
d-d.Z:ej-j.ee,d)	/		*			0d6d1e8d+ee8 d,e8d#e2d$e2d2ee8 dej9fd3d4Z;dS )7    )OptionalN)Callback)DistributedDataParallelConfig)OptimizerConfig)	lightning)finetunepretrain)PEFT_STR2CLS)default_finetune_trainernemo_resume)default_logdefault_resumetensorboard_logger)
bf16_mixed)MockDataModule)SquadDataModule)T5Config11BT5Model)WarmupAnnealingScheduler)MegatronOptimizerModule)TimingCallbackt5_11bnamereturnc                   C   s   t jtt tdS )aU  
    Factory function to create a T5 11B model configuration.

    Returns:
        run.Config[pl.LightningModule]: Configuration for the T5 11B model.

    Examples:
        CLI usage:
            $ nemo llm pretrain model=t5_11b ...

        Python API usage:
            >>> model_config = model()
            >>> print(model_config)
    )config)runConfigr   r    r   r   W/home/ubuntu/.local/lib/python3.10/site-packages/nemo/collections/llm/recipes/t5_11b.pymodel)   s   r          F      @B tensor_parallelismpipeline_parallelismpipeline_parallelism_typevirtual_pipeline_parallelismcontext_parallelismsequence_parallelism	num_nodesnum_gpus_per_node	max_steps	callbacksc
                 C   s`   t jtj| |||||dddt jtdddddd}
t jtjdd|	|ddd||t |
d	d
d}|S )aZ  
    Configure the NeMo Lightning Trainer for T5 model.

    This function sets up the distributed training strategy and other training parameters.

    Args:
        tensor_parallelism (int): Degree of tensor model parallelism.
        pipeline_parallelism (int): Degree of pipeline model parallelism.
        pipeline_parallelism_type (Optional[torch.dtype]): Data type for pipeline parallelism.
        virtual_pipeline_parallelism (Optional[int]): Size of virtual pipeline parallelism.
        context_parallelism (int): Degree of context parallelism.
        sequence_parallelism (bool): Whether to use sequence parallelism.
        num_nodes (int): Number of compute nodes to use.
        num_gpus_per_node (int): Number of GPUs per node.
        max_steps (int): Maximum number of training steps.
        callbacks (Optional[list[run.Config[Callback]]]): List of callback configurations.

    Returns:
        run.Config[nl.Trainer]: Configuration for the NeMo Lightning Trainer.

    Examples:
        CLI usage:
            $ nemo llm pretrain trainer=t5_11b ...

        Python API usage:
            >>> trainer_config = trainer(num_nodes=2, num_gpus_per_node=8)
            >>> print(trainer_config)

    Note:
        For more information on distributed training strategies, refer to the
        NeMo documentation on multi-GPU and multi-node training.
    T)check_for_nan_in_gradgrad_reduce_in_fp32overlap_grad_reduceoverlap_param_gather)
tensor_model_parallel_sizepipeline_model_parallel_sizepipeline_dtype$virtual_pipeline_model_parallel_sizecontext_parallel_sizesequence_parallelgradient_as_bucket_viewckpt_async_saveckpt_parallel_loadddpgpur"   2       
   F  )acceleratoraccumulate_grad_batchesr/   deviceslimit_test_batcheslimit_val_batcheslog_every_n_stepsr.   r,   pluginsstrategyuse_distributed_samplerval_check_interval)r   r   nlMegatronStrategyr   Trainerr   )r&   r'   r(   r)   r*   r+   r,   r-   r.   r/   rJ   trainerr   r   r   rP   <   sH   ,rP   )targetr   defaultdirr   c                 C   s   t jtdddddd}t jtddddd	}t j|t t||t tgd
t jtdddddt	| |t
|ddt jt||dt dS )a|  
    Create a pre-training recipe for T5 11b model.

    This function sets up a complete configuration for pre-training, including
    model, trainer, data, logging, optimization, and resumption settings.

    Args:
        dir (Optional[str]): Directory for saving logs and checkpoints.
        name (str): Name of the pre-training run.
        num_nodes (int): Number of compute nodes to use.
        num_gpus_per_node (int): Number of GPUs per node.
        fn (Callable): The pre-training function to use.

    Returns:
        run.Partial: Partial configuration for pre-training.

    Examples:
        CLI usage:
            $ nemo llm pretrain --factory t5_11b
            $ nemo llm pretrain --factory "t5_11b(num_nodes=2, name='my_pretrain')"

        Python API usage:
            >>> recipe = pretrain_recipe(name="t5_11b_pretrain", num_nodes=2)
            >>> print(recipe)
    adam-C6?T{Gz?	optimizerlruse_distributed_optimizerbf16weight_decayNr%   h㈵>)warmup_stepswarmup_ratior.   min_lr)r,   r-   r/         i     
seq_lengthseq_length_decglobal_batch_sizemicro_batch_sizer   rS   r   r   r   lr_schedulerr    rP   datalogoptimresume)r   r   r   r   Partialr    rP   r   r   r   r   r   r   )rS   r   r,   r-   fn
opt_configrk   r   r   r   pretrain_recipe   s<   	

rt    loracheckpoint_pathpeft_schemec           	      C   s   t jtdddddd}t jtdddd	}t jtt t||d
t jtdddddt	||t
|ddt jt||dt| d}|du sG| dkrSd|jj_d|jj_|S | dv rjt t|  |_d|jj_|S td| )ah  
    Create a fine-tuning recipe for T5 11B model.

    This function sets up a complete configuration for fine-tuning, including
    model, trainer, data, logging, optimization, and resumption settings.
    The recipe uses LoRA (Low-Rank Adaptation) for efficient fine-tuning, unless peft_scheme is set to None.

    Args:
        checkpoint_path (str): Path to pretrained checkpoint
        dir (Optional[str]): Directory for saving logs and checkpoints.
        name (str): Name of the fine-tuning run.
        num_nodes (int): Number of compute nodes to use.
        num_gpus_per_node (int): Number of GPUs per node.
        peft_scheme (Optional[str]): Name of the peft scheme to use for fine-tuning.
            Allowed values: 'lora'/'dora'/'none'/None.

    Returns:
        run.Partial: Partial configuration for fine-tuning.

    Examples:
        CLI usage:
            $ nemo llm finetune --factory t5_11b

        Python API usage:
            >>> recipe = finetune_recipe(name="t5_11b_finetune", num_nodes=1)
            >>> print(recipe)

    Note:
        This recipe uses the SQuAD dataset for fine-tuning.
    rT   rU   TrV   rW   r?   rB   r]   )r^   r.   r`   )r,   r-   ra   rb   r"   rd   r   ri   rj   rl   Nnoner!   gh㈵>)rv   dorazUnrecognized peft scheme: )r   r   r   r   rq   r   r    r
   r   r   r   r   r   lowerrP   rJ   r4   ro   r   rY   r	   peft
ValueError)	rw   rS   r   r,   r-   rx   rs   rk   reciper   r   r   finetune_recipe   sJ   '	



r   )
r!   r"   NNr"   Fr#   r$   r%   N)ru   NrR   r"   r$   rv   )<typingr   lightning.pytorchpytorchplnemo_runr   torch$lightning.pytorch.callbacks.callbackr   megatron.core.distributedr   megatron.core.optimizerr   nemor   rM   nemo.collections.llm.apir   r   nemo.collections.llm.peftr	   -nemo.collections.llm.recipes.finetune_defaultr
   r   (nemo.collections.llm.recipes.log.defaultr   r   r   6nemo.collections.llm.recipes.precision.mixed_precisionr   !nemo.collections.llm.t5.data.mockr   "nemo.collections.llm.t5.data.squadr    nemo.collections.llm.t5.model.t5r   r   )nemo.lightning.pytorch.optim.lr_schedulerr   %nemo.lightning.pytorch.optim.megatronr   nemo.utils.exp_managerr   NAMEclifactoryr   LightningModuler    intdtypeboollistrO   rP   strrq   rt   r   r   r   r   r   <module>   s   	


T
?