o
    }oi!                     @   s  d dl mZ d dlmZ d dlZd dlZd dlm	Z	 d dl
mZ d dlmZ d dlmZ d dlmZ d dlmZmZ d d	lmZ d d
lmZ d dlmZ dZejjeddejej  fddZ!										d/de"de"deej# dee" de"de$de"de"de"dee%eje	   dejej& fdd Z'ejjeed!		"	#				$			d0d%ee( d&e(d'e(de"de"d(e"d)e"d*ee( d+ee" d,ee$ dej)fd-d.Z*dS )1    )OptionalN)Callback)DistributedDataParallelConfig)	lightning)llm)finetune)Llama32Reranker1BConfigReRankerModel)PEFT_STR2CLS)default_finetune_recipe)
bf16_mixedllama_reranker_1b)namereturnc                   C   s   t jtt tdS )av  
    Factory function to create a Llama Reranker 1B model configuration.

    Returns:
        run.Config[pl.LightningModule]: Configuration for the Llama Reranker 1B model.

    Examples:
        CLI usage:
            $ nemo llm pretrain model=llama_reranker_1b ...

        Python API usage:
            >>> model_config = model()
            >>> print(model_config)
    )config)runConfigr	   r    r   r   b/home/ubuntu/.local/lib/python3.10/site-packages/nemo/collections/llm/recipes/llama_reranker_1b.pymodel#   s   r      F   { tensor_parallelismpipeline_parallelismpipeline_parallelism_typevirtual_pipeline_parallelismcontext_parallelismsequence_parallelism	num_nodesnum_gpus_per_node	max_steps	callbacksc
                 C   sb   t jtj| |||||dddt jtddddddd}
t jtjdd|	|ddd||t |
d	d
d}|S )av  
    Configure the NeMo Lightning Trainer for NVEmbed Llama3.2 1B model.

    This function sets up the distributed training strategy and other training parameters.

    Args:
        tensor_parallelism (int): Degree of tensor model parallelism.
        pipeline_parallelism (int): Degree of pipeline model parallelism.
        pipeline_parallelism_type (Optional[torch.dtype]): Data type for pipeline parallelism.
        virtual_pipeline_parallelism (Optional[int]): Size of virtual pipeline parallelism.
        context_parallelism (int): Degree of context parallelism.
        sequence_parallelism (bool): Whether to use sequence parallelism.
        num_nodes (int): Number of compute nodes to use.
        num_gpus_per_node (int): Number of GPUs per node.
        max_steps (int): Maximum number of training steps.
        callbacks (Optional[list[run.Config[Callback]]]): List of callback configurations.

    Returns:
        run.Config[nl.Trainer]: Configuration for the NeMo Lightning Trainer.

    Examples:
        CLI usage:
            $ nemo llm pretrain trainer=llama_reranker_1b ...

        Python API usage:
            >>> trainer_config = trainer(num_nodes=2, num_gpus_per_node=8)
            >>> print(trainer_config)

    Note:
        For more information on distributed training strategies, refer to the
        NeMo documentation on multi-GPU and multi-node training.
    T)check_for_nan_in_gradgrad_reduce_in_fp32overlap_grad_reduceoverlap_param_gatheraverage_in_collective)
tensor_model_parallel_sizepipeline_model_parallel_sizepipeline_dtype$virtual_pipeline_model_parallel_sizecontext_parallel_sizesequence_parallelgradient_as_bucket_viewckpt_async_saveckpt_parallel_loadddpgpur   2       
   Fi  )acceleratoraccumulate_grad_batchesr"   deviceslimit_test_batcheslimit_val_batcheslog_every_n_stepsr!   r   pluginsstrategyuse_distributed_samplerval_check_interval)r   r   nlMegatronStrategyr   Trainerr   )r   r   r   r   r   r   r   r    r!   r"   r=   trainerr   r   r   rC   6   sJ   ,rC   )targetr   defaultmeta-llama/Llama-3.2-1B@   dirr   resume_pathmicro_batch_sizeglobal_batch_sizepeft_scheme
seq_lengthpacked_sequencec
              
   C   s  |du rd}|	du sJ dt t || ||||	}
|du s#| dkr.d|
jj_d|
jj_n/| dv rVt	
t|  |
_d|
j_d	|
j_d
|
jj_d
|
jj_d|
jj_ntd| ||
jj_d|
jj_t	j
tj||||
jjj|
jjj|
jjj|
jjjddd|
_|
S )a  
    Create a fine-tuning recipe for Llama Reranker 1B model.

    This function sets up a complete configuration for fine-tuning, including
    model, trainer, data, logging, optimization, and resumption settings.

    Args:
        dir (Optional[str]): Directory for saving logs and checkpoints.
        name (str): Name of the fine-tuning run.
        resume_path (str): Path to the Huggingface model or pretrained distributed checkpoint for resume
        num_nodes (int): Number of compute nodes to use.
        num_gpus_per_node (int): Number of GPUs per node.
        micro_batch_size (int): Size of micro batch.
        global_batch_size (int): Size of global batch.
        peft_scheme (Optional[str]): Name of the peft scheme to use for fine-tuning.
            Allowed values: 'lora'/'dora'/'none'/None.
        seq_length (int): Maximum number of tokens per microbatch.
        packed_sequence (Optional[bool]): If true, fine-tuning sequences will be packed into batches up to the given
            maximum seq_length for better efficiency. pack sequence is not supported for embedding model training.

    Returns:
        run.Partial: Partial configuration for fine-tuning.

    Examples:
        CLI usage:
            $ nemo llm finetune --factory nvembed_llama_1b

        Python API usage:
            >>> recipe = finetune_recipe(name="nvembed_llama_1b_finetune", num_nodes=2)
            >>> print(recipe)

    Note:
        This recipe uses the SPECTER dataset for fine-tuning.
    Ni   z>pack_sequence is not supported for Embedding model finetuning.noner   gh㈵>)loradorar      Fg-C6?zUnrecognized peft scheme: )num_hard_negativesnegative_sample_strategyadd_bosadd_eosr   )rM   rJ   rK   dataset_kwargsnum_workers)r   r   lowerrC   r=   r(   optimr   lrr   r   r
   peftdimalphause_distributed_optimizercross_entropy_loss_fusion
ValueErrorrM   rS   r   SpecterReRankerDataModulerT   rU   rV   data)rH   r   rI   r   r    rJ   rK   rL   rM   rN   reciper   r   r   finetune_recipe   s<   /




re   )
r   r   NNr   Fr   r   r   N)
NrE   rF   r   r   r   rG   NNN)+typingr   lightning.pytorchpytorchplnemo_runr   torch$lightning.pytorch.callbacks.callbackr   megatron.core.distributedr   nemor   r@   nemo.collectionsr   nemo.collections.llm.apir   'nemo.collections.llm.gpt.model.rerankerr   r	   nemo.collections.llm.peftr
   -nemo.collections.llm.recipes.finetune_defaultr   6nemo.collections.llm.recipes.precision.mixed_precisionr   NAMEclifactoryr   LightningModuler   intdtypeboollistrB   rC   strPartialre   r   r   r   r   <module>   s   	


U	
