o
    }oi@                     @   s  d dl mZmZ d dlmZ d dlZd dlm	Z	m
Z
 d dlmZ d dlmZ d dlmZmZ d dlmZ d dlmZ d d	lmZ d d
lmZmZmZ d dlmZ d dlmZ d dl m!Z! dZ"ej#j$e"ddej%ej& fddZ'ej#j$e
e"ddddde
fdee( de(de)de)dedej*fddZ+ej#j$e	e"d						 		d'dee( d!e(de(de)de)d"ee( d#ee) d$ee, dej*fd%d&Z-dS )(    )CallableOptionalN)finetunepretrain)MockDataModule)PackedSequenceSpecs)DeepSeekModelDeepSeekV2Config)PEFT_STR2CLS)trainer)default_finetune_recipe)default_logdefault_resumetensorboard_logger),distributed_fused_adam_with_cosine_annealing)DeepEPCallback)TimingCallbackdeepseek_v2namereturnc                  C   s   t t} t jt| dS )ak  
    Factory function to create a DeepSeek-V2 (236B) model configuration.

    Returns:
        run.Config[pl.LightningModule]: Configuration for the DeepSeek V2 model.

    Examples:
        CLI usage:
            $ nemo llm pretrain model=deepseek_v2 ...

        Python API usage:
            >>> model_config = model()
            >>> print(model_config)
    )config)runConfigr	   r   )conf r   \/home/ubuntu/.local/lib/python3.10/site-packages/nemo/collections/llm/recipes/deepseek_v2.pymodel$   s   
r   )targetr   default      dirr   	num_nodesnum_gpus_per_nodefnc                 C   s   t j|t tddd||t tgdt jtddddt| |t|dd	t	d
dt
 d}d|jj_d|jj_d|jj_t t}|jj| |S )a  
    Create a pre-training recipe for DeepSeek-V2 (236B) model.

    This function sets up a complete configuration for pre-training, including
    model, trainer, data, logging, optimization, and resumption settings.

    Args:
        dir (Optional[str]): Directory for saving logs and checkpoints.
        name (str): Name of the pre-training run.
        num_nodes (int): Number of compute nodes to use.
        num_gpus_per_node (int): Number of GPUs per node.
        performance_mode (bool): If true, enables optimizations for maximum performance.
        fn (Callable): The pre-training function to use.

    Returns:
        run.Partial: Partial configuration for pre-training.

    Examples:
        CLI usage:
            $ nemo llm pretrain --factory deepseek_v2
            $ nemo llm pretrain --factory "deepseek_v2(num_nodes=16, name='my_deepseek_v2')"

        Python API usage:
            >>> recipe = pretrain_recipe(name="deepseek_v2_pretrain", num_nodes=16)
            >>> print(recipe)

              )tensor_parallelismpipeline_parallelismexpert_parallelismr#   r$   	callbacksi   i   )
seq_lengthglobal_batch_sizemicro_batch_sizer   )r"   r   r   ga2U0*3?)max_lr)r   r   datalogoptimresumefulluniform)r   Partialr   r   r   r   r   r   r   r   r   r   recompute_granularityrecompute_methodrecompute_num_layersr   r,   append)r"   r   r#   r$   r%   recipedeepep_callbackr   r   r   pretrain_recipe8   s,   #




r>   deepseek-ai/DeepSeek-V2   loraresume_pathpeft_schemer-   packed_sequencec           	      C   s8  |du rd}|du r|du s|  dkrd}n|  dv rd}tt || ||||}|du s4|  dkrDd|jj_d|jj_d	|jj_	n<|  dv ryt
t|   |_g d
|j_d|jj_d|jj_d|jj_d|jj_d|jj_d|jj_	ntd| ||jj_||j_|rddi|j_t
jt|d|j_|S )a`  
    Create a fine-tuning recipe for DeepSeek-V2 (236B) model.

    This function sets up a complete configuration for fine-tuning, including
    model, trainer, data, logging, optimization, and resumption settings.
    The recipe uses LoRA (Low-Rank Adaptation) for efficient fine-tuning, unless peft_scheme is set to None.

    Args:
        dir (Optional[str]): Directory for saving logs and checkpoints.
        resume_path (str): Path to the NeMo checkpoint
        name (str): Name of the fine-tuning run.
        num_nodes (int): Number of compute nodes to use.
        num_gpus_per_node (int): Number of GPUs per node.
        peft_scheme (Optional[str]): Name of the peft scheme to use for fine-tuning.
            Allowed values: 'lora'/'dora'/'none'/None.
        seq_length (int): Maximum number of tokens per microbatch.
        packed_sequence (Optional[bool]): If true, fine-tuning sequences will be packed into batches up to the given
            maximum seq_length for better efficiency. By default, this value equals performance_mode.
    Returns:
        run.Partial: Partial configuration for fine-tuning.

    Examples:
        CLI usage:
            $ nemo llm finetune --factory deepseek_v2
            $ nemo llm finetune --factory "deepseek_v2(num_nodes=2, name='my_deepseek_v2_finetune')"

        Python API usage:
            >>> recipe = finetune_recipe(name="deepseek_v2_finetune", num_nodes=2)
            >>> print(recipe)

    Note:
        This recipe uses the SQuAD dataset for fine-tuning. Be aware that fine-tuning the DeepSeek-V2 model
        requires substantial computational resources.
    Ni   noner    )rA   dorar@   r'   r(   gh㈵>)linear_q_down_projlinear_q_up_projlinear_kv_down_projlinear_kv_up_projlinear_projFr&   g-C6?zUnrecognized peft scheme: pad_to_max_lengthT)packed_sequence_size)lowerr   r   r   strategypipeline_model_parallel_sizeexpert_model_parallel_sizer3   r   lrr   r   r
   pefttarget_modulesuse_distributed_optimizercross_entropy_loss_fusiontensor_model_parallel_size
ValueErrorr-   r1   dataset_kwargsr   packed_sequence_specs)	r"   rB   r   r#   r$   rC   r-   rD   r<   r   r   r   finetune_recipew   s8   .







r[   )Nr?   r   r@   r!   rA   NN).typingr   r   lightning.pytorchpytorchplnemo_runr   nemo.collections.llm.apir   r   "nemo.collections.llm.gpt.data.mockr   -nemo.collections.llm.gpt.data.packed_sequencer   'nemo.collections.llm.gpt.model.deepseekr   r	   nemo.collections.llm.peftr
   %nemo.collections.llm.recipes.deepseekr   -nemo.collections.llm.recipes.finetune_defaultr   (nemo.collections.llm.recipes.log.defaultr   r   r   'nemo.collections.llm.recipes.optim.adamr   'nemo.lightning.pytorch.callbacks.deepepr   nemo.utils.exp_managerr   NAMEclifactoryr   LightningModuler   strintr7   r>   boolr[   r   r   r   r   <module>   s   >	