o
    }oi/                  (   @   sr  d dl mZ d dlmZ d dlZd dlZd dlm	Z	 d dl
mZ d dlmZmZ d dlmZ d dlmZmZ d dlmZ d d	lmZ d d
lmZmZmZ d dlmZ d dlm Z m!Z! d dl"m#Z# dZ$ej%j&e$ddej'ej( fddZ)																dAde*de*deej+ d ee* d!e*d"e,d#e*d$e*d%e*d&e-d'e*d(e*d)e*d*e*d+e*d,ee.ej'e	   dej'ej/ f"d-d.Z0ej%j&ee$d/dd0ddej1dddddd1ddd2dddd3ddd4d5d d6d7efd8ee- d9e-de*de*deej+ d ee* d!e*d"e,d#e*d$e*d%e*d&e-d'e*d:e2d(e*d)e*d*e*d+e*dej3f&d;d<Z4ej%j&ee$d/		0			=dBd8ee- d9e-d#e*d$e*d>ee- dej3fd?d@Z5dS )C    )OptionalN)Callback)	lightning)finetunepretrain)MockDataModule)StarcoderConfig15BStarcoderModel)PEFT_STR2CLS)default_finetune_recipe)default_logdefault_resumetensorboard_logger),distributed_fused_adam_with_cosine_annealing)
bf16_mixed
fp16_mixed)TimingCallbackstarcoder_15bnamereturnc                   C   s   t jtt tdS )aj  
    Factory function to create a Starcoder 15B model configuration.

    Returns:
        run.Config[pl.LightningModule]: Configuration for the Starcoder 15B model.

    Examples:
        CLI usage:
            $ nemo llm pretrain model=starcoder_15b ...

        Python API usage:
            >>> model_config = model()
            >>> print(model_config)
    )config)runConfigr	   r    r   r   ^/home/ubuntu/.local/lib/python3.10/site-packages/nemo/collections/llm/recipes/starcoder_15b.pymodel$   s   r            F   { 
bf16-mixed    
     tensor_parallelismpipeline_parallelismpipeline_parallelism_typevirtual_pipeline_parallelismcontext_parallelismsequence_parallelism	num_nodesnum_gpus_per_node	max_steps	precisionaccumulate_grad_batcheslimit_test_batcheslimit_val_batcheslog_every_n_stepsval_check_interval	callbacksc                 C   sp   t jtj| |||||ddddd}d}|	dkrt }n|	dkr"t }t jtjd|||
|||||||d|d}|S )	a  
    Configure the NeMo Lightning Trainer for Starcoder 15B models.

    This function sets up the distributed training strategy and other training parameters.

    Args:
        tensor_parallelism (int): Degree of tensor model parallelism.
        pipeline_parallelism (int): Degree of pipeline model parallelism.
        pipeline_parallelism_type (Optional[torch.dtype]): Data type for pipeline parallelism.
        virtual_pipeline_parallelism (Optional[int]): Size of virtual pipeline parallelism.
        context_parallelism (int): Degree of context parallelism.
        sequence_parallelism (bool): Whether to use sequence parallelism.
        num_nodes (int): Number of compute nodes to use.
        num_gpus_per_node (int): Number of GPUs per node.
        max_steps (int): Maximum number of training steps.
        precision (str): Precision configuration, one of fp32, 16-mixed or bf16-mixed.
        accumulate_grad_batches (int): Number of steps per gradient accumulation.
        limit_test_batches (int): Limit the number of test batches.
        limit_val_batches (int): Limit the number of validation batches.
        log_every_n_steps (int): Log every n steps.
        val_check_interval (int): Run validation every N steps.
        callbacks (Optional[list[run.Config[Callback]]]): List of callback configurations.

    Returns:
        run.Config[nl.Trainer]: Configuration for the NeMo Lightning Trainer.
    T)
tensor_model_parallel_sizepipeline_model_parallel_sizepipeline_dtype$virtual_pipeline_model_parallel_sizecontext_parallel_sizesequence_parallelgradient_as_bucket_viewckpt_include_optimizerckpt_async_saveckpt_parallel_loadNz16-mixedr"   gpuF)acceleratorr5   devicesr0   r1   r2   r3   r.   r,   pluginsstrategyuse_distributed_samplerr4   )r   r   nlMegatronStrategyr   r   Trainer)r&   r'   r(   r)   r*   r+   r,   r-   r.   r/   r0   r1   r2   r3   r4   r5   rD   precision_plugintrainerr   r   r   starcoder_trainer8   sF   ,rK   )targetr   defaulti g      ?i  i   i  giUMu>ga2U0*3?dirr   gradient_clip_valc                 C   s   t j|t tdi d|d|d|d|d|d|d|d|	d	|
d
|d|d|d|d|d|dt tgt jt|||dt| |t|ddt	||||||dt
 dS )al	  
    Create a pre-training recipe for Starcoder 15B model.

    This function sets up a complete configuration for pre-training, including
    model, trainer, data, logging, optimization, and resumption settings.

    Args:
        dir (Optional[str]): Directory for saving logs and checkpoints.
        name (str): Name of the pre-training run.
        tensor_parallelism (int): Degree of tensor model parallelism.
        pipeline_parallelism (int): Degree of pipeline model parallelism.
        pipeline_parallelism_type (Optional[torch.dtype]): Data type for pipeline parallelism.
        virtual_pipeline_parallelism (Optional[int]): Size of virtual pipeline parallelism.
        context_parallelism (int): Degree of context parallelism.
        sequence_parallelism (bool): Whether to use sequence parallelism.
        num_nodes (int): Number of compute nodes to use.
        num_gpus_per_node (int): Number of GPUs per node.
        max_steps (int): Maximum number of training steps.
        precision (str): Precision configuration, one of fp32, 16-mixed or bf16-mixed.
        accumulate_grad_batches (int): Number of steps per gradient accumulation.
        gradient_clip_val (float): Value for gradient clipping.
        limit_test_batches (int): Limit the number of test batches.
        limit_val_batches (int): Limit the number of validation batches.
        log_every_n_steps (int): Log every n steps.
        val_check_interval (int): Run validation every N steps.
        global_batch_size (int): Global batch size.
        micro_batch_size (int): Micro batch size.
        seq_length (int): Sequence length.
        warmup_steps (int): Number of warmup steps.
        constant_steps (int): Number of constant steps.
        min_lr (float): Minimum learning rate.
        max_lr (float): Maximum learning rate.
        fn (Callable): The pre-training function to use.

    Returns:
        run.Partial: Partial configuration for pre-training.

    Examples:
        CLI usage:
            $ nemo llm pretrain --factory starcoder_15b
            $ nemo llm pretrain --factory "starcoder_15b(num_nodes=1, name='my_starcoder2_pretrain')"

        Python API usage:
            >>> recipe = pretrain_recipe(name="starcoder2_pretrain", num_nodes=1)
            >>> print(recipe)

    Note:
        This recipe uses a mock dataset, look for the finetune examples to see how to change the dataset.
    r&   r'   r(   r)   r*   r+   r,   r-   r.   r/   r0   r1   r2   r3   r4   r5   )
seq_lengthglobal_batch_sizemicro_batch_sizer   )rN   r   r   )r/   warmup_stepsconstant_stepsmin_lrmax_lr	clip_grad)r   rJ   datalogoptimresumeNr   )r   Partialr   rK   r   r   r   r   r   r   r   )rN   r   r&   r'   r(   r)   r*   r+   r,   r-   r.   r/   r0   rO   r1   r2   r3   r4   rQ   rR   rP   rS   rT   rU   rV   fnr   r   r   pretrain_recipe   sj   S	
r^   lorapeft_schemec                 C   s|   t t d| |||}|du s| dkr d|jj_d|jj_|S | dv r7t	
t|  |_d|jj_|S td| )	a@  
    Create a fine-tuning recipe for Starcoder 15B model.

    This function sets up a complete configuration for fine-tuning, including
    model, trainer, data, logging, optimization, and resumption settings.
    The recipe uses LoRA (Low-Rank Adaptation) for efficient fine-tuning, unless peft_scheme is set to None.

    Args:
        dir (Optional[str]): Directory for saving logs and checkpoints.
        name (str): Name of the fine-tuning run.
        num_nodes (int): Number of compute nodes to use.
        num_gpus_per_node (int): Number of GPUs per node.
        peft_scheme (Optional[str]): Name of the peft scheme to use for fine-tuning.
            Allowed values: 'lora'/'dora'/'none'/None.

    Returns:
        run.Partial: Partial configuration for fine-tuning.

    Examples:
        CLI usage:
            $ nemo llm finetune --factory starcoder_15b

        Python API usage:
            >>> recipe = finetune_recipe(name="starcoder_15b_finetune", num_nodes=2)
            >>> print(recipe)

    Note:
        This recipe uses the SQuAD dataset for fine-tuning.
    zbigcode/starcoderNnoner    gh㈵>)r_   dorag-C6?zUnrecognized peft scheme: )r   r   lowerrJ   rD   r7   rZ   r   lrr   r   r
   peft
ValueError)rN   r   r,   r-   r`   reciper   r   r   finetune_recipe  s   %


rh   )r   r   NNr   Fr   r    r!   r"   r   r#   r#   r$   r%   N)NrM   r   r    r_   )6typingr   lightning.pytorchpytorchplnemo_runr   torch$lightning.pytorch.callbacks.callbackr   nemor   rF   nemo.collections.llm.apir   r   "nemo.collections.llm.gpt.data.mockr   (nemo.collections.llm.gpt.model.starcoderr   r	   nemo.collections.llm.peftr
   -nemo.collections.llm.recipes.finetune_defaultr   (nemo.collections.llm.recipes.log.defaultr   r   r   'nemo.collections.llm.recipes.optim.adamr   6nemo.collections.llm.recipes.precision.mixed_precisionr   r   nemo.utils.exp_managerr   NAMEclifactoryr   LightningModuler   intdtypeboolstrlistrH   rK   bfloat16floatr\   r^   rh   r   r   r   r   <module>   s4  	


T	
 z