o
    }oi?                     @   s  d dl mZmZ d dlmZ d dlZd dlm	Z	m
Z
 d dlmZ d dlmZ d dlmZmZ d dlmZ d dlmZ d d	lmZ d d
lmZmZmZ d dlmZ d dlmZ d dl m!Z! dZ"ej#j$e"ddej%ej& fddZ'ej#j$e
e"ddddde
fdee( de(de)de)dedej*fddZ+ej#j$e	e"d							d$dee( de(de)de)dee( d ee) d!ee, dej*fd"d#Z-dS )%    )CallableOptionalN)finetunepretrain)MockDataModule)PackedSequenceSpecs)DeepSeekModelDeepSeekV2LiteConfig)PEFT_STR2CLS)trainer)default_finetune_recipe)default_logdefault_resumetensorboard_logger),distributed_fused_adam_with_cosine_annealing)DeepEPCallback)TimingCallbackdeepseek_v2_litenamereturnc                  C   s   t t} t jt| dS )ay  
    Factory function to create a DeepSeek-V2-Lite (16B) model configuration.

    Returns:
        run.Config[pl.LightningModule]: Configuration for the DeepSeek V2 Lite model.

    Examples:
        CLI usage:
            $ nemo llm pretrain model=deepseek_v2_lite ...

        Python API usage:
            >>> model_config = model()
            >>> print(model_config)
    )config)runConfigr	   r   )conf r   a/home/ubuntu/.local/lib/python3.10/site-packages/nemo/collections/llm/recipes/deepseek_v2_lite.pymodel$   s   
r   )targetr   default      dirr   	num_nodesnum_gpus_per_nodefnc                 C   sr   t j|t tdd||t tgdt jtddddt| |t|ddt	d	d
t
 d}t t}|jj| |S )a  
    Create a pre-training recipe for DeepSeek-V2-Lite (16B) model.

    This function sets up a complete configuration for pre-training, including
    model, trainer, data, logging, optimization, and resumption settings.

    Args:
        dir (Optional[str]): Directory for saving logs and checkpoints.
        name (str): Name of the pre-training run.
        num_nodes (int): Number of compute nodes to use.
        num_gpus_per_node (int): Number of GPUs per node.
        performance_mode (bool): If true, enables optimizations for maximum performance.
        fn (Callable): The pre-training function to use.

    Returns:
        run.Partial: Partial configuration for pre-training.

    Examples:
        CLI usage:
            $ nemo llm pretrain --factory deepseek_v2_lite
            $ nemo llm pretrain --factory "deepseek_v2_lite(num_nodes=4, name='my_deepseek_v2_lite')"

        Python API usage:
            >>> recipe = pretrain_recipe(name="deepseek_v2_lite_pretrain", num_nodes=4)
            >>> print(recipe)

    r    r!   )tensor_parallelismexpert_parallelismr#   r$   	callbacksi   i   )
seq_lengthglobal_batch_sizemicro_batch_sizer   )r"   r   r   ga2U0*3?)max_lr)r   r   datalogoptimresume)r   Partialr   r   r   r   r   r   r   r   r   r   r(   append)r"   r   r#   r$   r%   recipedeepep_callbackr   r   r   pretrain_recipe8   s$   #

r5   lorapeft_schemer)   packed_sequencec                 C   s   |du rd}|du rd}t t d| ||||}|du s!| dkr1d|jj_d|jj_d|jj_	n-| d	v rWt
t|  |_g d
|j_d|jj_d|jj_d|jj_	ntd| ||jj_||j_|rxddi|j_t
jt|d|j_|S )a  
    Create a fine-tuning recipe for DeepSeek-V2-Lite (16B) model.

    This function sets up a complete configuration for fine-tuning, including
    model, trainer, data, logging, optimization, and resumption settings.
    The recipe uses LoRA (Low-Rank Adaptation) for efficient fine-tuning, unless peft_scheme is set to None.

    Args:
        dir (Optional[str]): Directory for saving logs and checkpoints.
        name (str): Name of the fine-tuning run.
        num_nodes (int): Number of compute nodes to use.
        num_gpus_per_node (int): Number of GPUs per node.
        peft_scheme (Optional[str]): Name of the peft scheme to use for fine-tuning.
            Allowed values: 'lora'/'dora'/'none'/None.
        seq_length (int): Maximum number of tokens per microbatch.
        packed_sequence (Optional[bool]): If true, fine-tuning sequences will be packed into batches up to the given
            maximum seq_length for better efficiency. By default, this value equals performance_mode.
        performance_mode (bool): If true, enables optimizations for maximum performance.
    Returns:
        run.Partial: Partial configuration for fine-tuning.

    Examples:
        CLI usage:
            $ nemo llm finetune --factory deepseek_v2_lite
            $ nemo llm finetune --factory "deepseek_v2_lite(num_nodes=3, name='my_deepseek_v2_lite_finetune')"

        Python API usage:
            >>> recipe = finetune_recipe(name="deepseek_v2_lite_finetune", num_nodes=2)
            >>> print(recipe)

    Note:
        This recipe uses the SQuAD dataset for fine-tuning. Be aware that fine-tuning the DeepSeek-V2 model
        requires substantial computational resources.
    Ni   r    zdeepseek-ai/DeepSeek-V2-Litenone   Tgh㈵>)r6   dora)linear_q_projlinear_kv_down_projlinear_kv_up_projlinear_projFg-C6?zUnrecognized peft scheme: pad_to_max_length)packed_sequence_size)r   r   lowerr   strategytensor_model_parallel_sizesequence_parallelr/   r   lrr   r   r
   pefttarget_modulesuse_distributed_optimizercross_entropy_loss_fusion
ValueErrorr)   r-   dataset_kwargsr   packed_sequence_specs)r"   r   r#   r$   r7   r)   r8   r3   r   r   r   finetune_recipep   s0   -




rN   )Nr   r    r!   r6   NN).typingr   r   lightning.pytorchpytorchplnemo_runr   nemo.collections.llm.apir   r   "nemo.collections.llm.gpt.data.mockr   -nemo.collections.llm.gpt.data.packed_sequencer   'nemo.collections.llm.gpt.model.deepseekr   r	   nemo.collections.llm.peftr
   %nemo.collections.llm.recipes.deepseekr   -nemo.collections.llm.recipes.finetune_defaultr   (nemo.collections.llm.recipes.log.defaultr   r   r   'nemo.collections.llm.recipes.optim.adamr   'nemo.lightning.pytorch.callbacks.deepepr   nemo.utils.exp_managerr   NAMEclifactoryr   LightningModuler   strintr1   r5   boolrN   r   r   r   r   <module>   sz   7