o
    }oi                      @   s  d dl mZmZmZ d dlmZ d dlZd dl	Z	d dl
mZ d dlmZ d dlmZ d dlmZmZ d dlmZ d dlmZ d dlmZ d d	lmZ d d
lmZ erZd dlmZm Z  eZ!						d-dej"ej# de$dee$ de$de%de%de&dee! dej'fddZ(dde	j)dddddddddfddZ*				d.dee$ de$deej"d   d!eej"d"  dej"ej+ f
d#d$Z,d%e$dej"ej- fd&d'Z.ej/j0d(d)dej"e fd*d(Z1ej/j0d+d)dej"e fd,d+Z2dS )/    )TYPE_CHECKINGAnyOptionalN)llm)PackedSequenceSpecs)DoRALoRA)tensorboard_logger),distributed_fused_adam_with_cosine_annealing)
bf16_mixed)PEFT)TimingCallback)TensorBoardLoggerWandbLoggerdefault      Fmodelresume_pathdirname	num_nodesnum_gpus_per_nodepacked_sequence	tokenizerreturnc           
      C   s~   |rt jtjdddtddd}n
t jtjdddd}t jtj| t||d|t||t	|d	d
t
dddddt||d}	|	S )a  
    Create a default fine-tuning recipe for any model.

    This function sets up a template for a complete configuration for fine-tuning, including
    model, trainer, data, logging, optimization, and resumption settings.

    Args:
        model (run.Config[pl.LightningModule]): Configuration for a NeMo model.
        resume_path (str): Path to the Huggingface model or pretrained distributed checkpoint for resume
        dir (Optional[str]): Directory for saving logs and checkpoints.
        name (str): Name of the fine-tuning run.
        num_nodes (int): Number of compute nodes to use.
        num_gpus_per_node (int): Number of GPUs per node.
        packed_sequence (bool): Whether to use packed sequence.
        tokenizer (Optional[TokenizerType]): Tokenizer setting to be applied. Can be 'data' or 'model'
            or an instance of TokenizerSpec.

    Returns:
        run.Partial: Partial configuration for fine-tuning.

    See usages of this recipe for further details.
    i   r   r   )packed_sequence_size)
seq_lengthglobal_batch_sizemicro_batch_sizepacked_sequence_specs   )r   r   r   )r   r   r   )r   r   r	   g-C6?r   2   g\(\?)max_lrmin_lrwarmup_steps
adam_beta2)r   trainerdatalogoptimresumer   )runConfigr   SquadDataModuler   Partialfinetunedefault_finetune_trainerdefault_finetune_logr	   r
   nemo_resume)
r   r   r   r   r   r   r   r   
datamodulerecipe r7   a/home/ubuntu/.local/lib/python3.10/site-packages/nemo/collections/llm/recipes/finetune_default.pydefault_finetune_recipe%   s.    r9   i     c                 C   sT   t jtj| |||||ddd	}t jtjdd||	|
d||t |d|t tgd}|S )a;  
    Create a default fine-tuning trainer for any model.

    This function sets up a template for strategy and trainer.

    Args:
        See docstrings of MegatronStrategy and Trainer.

    Returns:
        run.Config: Config for a finetuning trainer.

    See usages of this in recipes for further details.
    Tlog_all)tensor_model_parallel_sizepipeline_model_parallel_sizepipeline_dtype$virtual_pipeline_model_parallel_sizecontext_parallel_sizesequence_parallelgradient_as_bucket_viewckpt_load_strictnessgpur   F)acceleratoraccumulate_grad_batchesdeviceslimit_test_batcheslimit_val_batcheslog_every_n_steps	max_stepsr   pluginsstrategyuse_distributed_samplerval_check_interval	callbacks)r-   r.   nlMegatronStrategyTrainerr   r   )tensor_parallelismpipeline_parallelismpipeline_parallelism_typevirtual_pipeline_parallelismcontext_parallelismsequence_parallelismr   r   rK   rH   rI   rO   rM   r(   r7   r7   r8   r2   `   s8   
r2   r	   r   wandb_loggerr   c                 C   s.   t jtjddddd}t jtj||||| dS )aI  
    Create a default fine-tuning logger for any model.

    This function sets up a template for ModelCheckpoint and NeMoLogger.

    Args:
        See docstrings of ModelCheckpoint and NeMoLogger.

    Returns:
        run.Config: Config for a finetuning NeMoLogger.

    See usages of this in recipes for further details.
    link   r#   z6{model_name}--{val_loss:.2f}-{step}-{consumed_samples})	save_last
save_top_kevery_n_train_stepsfilename)ckptr   tensorboardwandblog_dir)r-   r.   rQ   ModelCheckpoint
NeMoLogger)r   r   r	   rZ   ra   r7   r7   r8   r3      s   r3   model_idc                 C   s"   t jtjt jtjd|  ddS )a  
    Configure automatic resumption from a NeMo checkpoint converted from Huggingface for
    https://huggingface.co/{model_id}.

    This NeMo checkpoint should be converted from Huggingface beforehand, using nemo.collections.llm.import_ckpt.
    When converting the checkpoint, the NeMo checkpoint will be saved in NEMO_HOME (set to ~/.cache/nemo by default).

    This function sets up the configuration to resume training from path nemo://{model_id}.
    This translates to the full path {NEMO_HOME}/models/{model_id}.

    Args:
        model_id (str): Path to the Huggingface model or pretrained distributed checkpoint for resume

    Returns:
        run.Config[nl.AutoResume]: Configuration for resuming from NeMo checkpoint.
    znemo://)path)restore_config)r-   r.   rQ   
AutoResumeRestoreConfig)rg   r7   r7   r8   r4      s   r4   lorar"   c                   C   
   t tS )a@  
    Factory function to create a LoRA configuration.

    Returns:
        run.Config[PEFT]: Configuration for the LoRA class.

    Examples:
        CLI usage:
            $ nemo llm finetune -f llama3_8b peft=lora

        Python API usage:
            >>> lora_config = lora()
            >>> print(lora_config)
    )r-   r.   r   r7   r7   r7   r8   rl         
dorac                   C   rm   )a@  
    Factory function to create a DoRA configuration.

    Returns:
        run.Config[PEFT]: Configuration for the DoRA class.

    Examples:
        CLI usage:
            $ nemo llm finetune -f llama3_8b peft=dora

        Python API usage:
            >>> dora_config = dora()
            >>> print(dora_config)
    )r-   r.   r   r7   r7   r7   r8   ro      rn   )Nr   r   r   Fr   )Nr   NN)3typingr   r   r   lightning.pytorchpytorchplnemo_runr-   torchnemo.lightning	lightningrQ   nemo.collectionsr   -nemo.collections.llm.gpt.data.packed_sequencer   nemo.collections.llm.peftr   r   (nemo.collections.llm.recipes.log.defaultr	   'nemo.collections.llm.recipes.optim.adamr
   6nemo.collections.llm.recipes.precision.mixed_precisionr    nemo.lightning.pytorch.callbacksr   nemo.utils.exp_managerr   lightning.pytorch.loggersr   r   TokenizerTyper.   LightningModulestrintboolr0   r9   bfloat16r2   rf   r3   rj   r4   clifactoryrl   ro   r7   r7   r7   r8   <module>   s   
	
<
<

&