o
    }oi5&                     @   s  d dl mZmZ d dlmZmZmZ d dlmZ	 d dl
Zd dlZd dlmZ d dlmZ d dlmZ d dlmZ d dlmZ d d	lmZ d d
lmZ d dlmZmZmZ d dl m!Z! d dl"m#Z# d dl$m%Z% d dl&m'Z' d dl(m)Z) d dl*m+Z+ dZ,eG dd dej-Z.ej/j0e,d	d8dee1 dej2e	j3 fddZ4										d9d e1d!e1d"eej5 d#ee1 d$e1d%e6d&e1d'e1d(e1d)ee7ej2e   dej2ej8 fd*d+Z9ej/j0ee,d,dd-dddddefd.ee: d/e:d&e1d'e1de1d0e6d1e6d2edej;fd3d4Z<d5ej;dej;fd6d7Z=dS ):    )	dataclassfield)CallableOptionalUnionN)Callback)DistributedDataParallelConfig)TransformerConfig)	lightning)vlm)pretrainLlama3Config8B)default_logdefault_resumetensorboard_logger),distributed_fused_adam_with_cosine_annealing)
bf16_mixed)MockDataModule)GarbageCollectionCallback)MegatronCommOverlapCallback)TimingCallbackneva_llama3_8bc                   @   s   e Zd ZU dZddlmZ edd dZee	d< edd dZ
eeef e	d	< ed
d dZee	d< dZee	d< dZee	d< dZee	d< dS )NevaConfig8Bz#NeVA (CLIP-ViT-L + LLaMa38B) Configr   )PretrainedConfigc                   C   s
   t ddS )N    
seq_lengthr    r   r   _/home/ubuntu/.local/lib/python3.10/site-packages/nemo/collections/vlm/recipes/neva_llama3_8b.py<lambda>.   s   
 zNevaConfig8B.<lambda>)default_factorylanguage_transformer_configc                   C   s   t jddS )Nz!openai/clip-vit-large-patch14-336)pretrained_model_name_or_path)r   HFCLIPVisionConfigr   r   r   r   r    0   s    vision_transformer_configc                   C   s   t jddddS )Ni   i   )
input_sizehidden_sizeffn_hidden_size)r   MultimodalProjectorConfigr   r   r   r   r    5   s    vision_projection_configFfreeze_language_modelTfreeze_vision_modelfreeze_vision_projectionN)__name__
__module____qualname____doc__transformersr   r   r"   r	   __annotations__r%   r   r*   r+   boolr,   r-   r   r   r   r   r   (   s   
 r   namer   r   returnc                 C   s   t jtjt td}|S )z
    Factory function to create a NeVA (CLIP-ViT-L + LLaMa3 8B) model configuration.

    Returns:
        run.Config[pl.LightningModule]: Configuration for the NeVA (CLIP-ViT-L + LLaMa3 8B) model.
    )config)runConfigr   	NevaModelr   )r   model_configr   r   r   model=   s   
r=         F   { tensor_parallelismpipeline_parallelismpipeline_parallelism_typevirtual_pipeline_parallelismcontext_parallelismsequence_parallelism	num_nodesnum_gpus_per_node	max_steps	callbacksc
                 C   sb   t jtj| |||||dddt jtddddddd}
t jtjdd|	|ddd	||t |
dd
d}|S )a}  
    Configure the NeMo Lightning Trainer for NeVA (CLIP-ViT-L + LLaMa3 8B) model.

    This function sets up the distributed training strategy and other training parameters.

    Args:
        tensor_parallelism (int): Degree of tensor model parallelism.
        pipeline_parallelism (int): Degree of pipeline model parallelism.
        pipeline_parallelism_type (Optional[torch.dtype]): Data type for pipeline parallelism.
        virtual_pipeline_parallelism (Optional[int]): Size of virtual pipeline parallelism.
        context_parallelism (int): Degree of context parallelism.
        sequence_parallelism (bool): Whether to use sequence parallelism.
        num_nodes (int): Number of compute nodes to use.
        num_gpus_per_node (int): Number of GPUs per node.
        max_steps (int): Maximum number of training steps.
        callbacks (Optional[list[run.Config[Callback]]]): List of callback configurations.

    Returns:
        run.Config[nl.Trainer]: Configuration for the NeMo Lightning Trainer.

    Examples:
        CLI usage:
            $ nemo llm finetune trainer=neva_llama3_8b ...

        Python API usage:
            >>> trainer_config = trainer(num_nodes=2, num_gpus_per_node=8)
            >>> print(trainer_config)

    Note:
        For more information on distributed training strategies, refer to the
        NeMo documentation on multi-GPU and multi-node training.
    TF)check_for_nan_in_gradgrad_reduce_in_fp32overlap_grad_reduceoverlap_param_gatheraverage_in_collective)
tensor_model_parallel_sizepipeline_model_parallel_sizepipeline_dtype$virtual_pipeline_model_parallel_sizecontext_parallel_sizesequence_parallelgradient_as_bucket_viewckpt_async_saveckpt_parallel_loadddpgpur>   2       
   i  )acceleratoraccumulate_grad_batchesrK   deviceslimit_test_batcheslimit_val_batcheslog_every_n_stepsrJ   rH   pluginsstrategyuse_distributed_samplerval_check_interval)r9   r:   nlMegatronStrategyr   Trainerr   )rB   rC   rD   rE   rF   rG   rH   rI   rJ   rK   rf   trainerr   r   r   rl   K   sJ   ,rl   )targetr6   defaultdirr6   performance_modepacked_sequencefnc           	      C   sj   t j|t|dt||t tgdt jt|ddd|dt| |t|ddt	d	d
t
 d}|r3t|}|S )a  
    Create a finetuning recipe for NeVA (CLIP-ViT-L + LLaMa3 8B) model.
    Using pre-train fn targets to use appropriate llm api call.

    This function sets up a complete configuration for finetuning, including
    model, trainer, data, logging, optimization, and resumption settings.

    Args:
        dir (Optional[str]): Directory for saving logs and checkpoints.
        name (str): Name of the finetuning run.
        num_nodes (int): Number of compute nodes to use.
        num_gpus_per_node (int): Number of GPUs per node.
        performance_mode (bool): If true, enables optimizations for maximum performance.
        fn (Callable): The finetuning function to use.

    Returns:
        run.Partial: Partial configuration for fine-tuning.

    Examples:
        CLI usage:
            $ nemo llm finetune --factory neva_llama3_8b

        Python API usage:
            >>> recipe = finetune_recipe(name="neva_llama3_8b_finetune", num_nodes=2)
            >>> print(recipe)

    r   )rH   rI   rK   i   r>      )r   global_batch_sizemicro_batch_sizenum_workersrq   r5   )ro   r6   r   ga2U0*3?)max_lr)r=   rl   datalogoptimresume)r9   Partialr=   rl   r:   r   r   r   r   r   r   "finetune_performance_optimizations)	ro   r6   rH   rI   r   rp   rq   rr   reciper   r   r   finetune_recipe   s.   &
r   r~   c                 C   s   t | jdr| jjdu rg | j_| jjjdkr$| jjtjtdd n| jjtjtdd | jjtt	 | jjtt
dd d| jj_| S )a  
    Create a performance-optimized finetuning recipe for Llama3 8B model.

    This method enables performance optimizations that may not be suitable for all use cases.
    It builds upon the standard finetuning recipe and adds additional performance enhancements.

    Args:
        recipe (run.Partial): Base finetune recipe to which performance optimizations will be added

    Returns:
        run.Partial: Partial configuration for performance-optimized finetuning.

    Note:
        Use this method with caution and only when you need maximum performance.
        It may not be suitable for all hardware configurations or use cases.
    rK   Nr>   T)tp_comm_overlapFd   )hasattrrl   rK   rf   rQ   appendr9   r:   r   r   r   re   rM   )r~   r   r   r   r}      s2   
r}   )r   )
r>   r>   NNr?   Fr>   r@   rA   N)>dataclassesr   r   typingr   r   r   lightning.pytorchpytorchplnemo_runr9   torch$lightning.pytorch.callbacks.callbackr   megatron.core.distributedr   ,megatron.core.transformer.transformer_configr	   nemor
   ri   nemo.collectionsr   nemo.collections.llm.apir   $nemo.collections.llm.gpt.model.llamar   (nemo.collections.llm.recipes.log.defaultr   r   r   'nemo.collections.llm.recipes.optim.adamr   6nemo.collections.llm.recipes.precision.mixed_precisionr   #nemo.collections.vlm.neva.data.mockr   3nemo.lightning.pytorch.callbacks.garbage_collectionr   6nemo.lightning.pytorch.callbacks.megatron_comm_overlapr   nemo.utils.exp_managerr   NAME
NevaConfigr   clifactoryintr:   LightningModuler=   dtyper4   listrk   rl   strr|   r   r}   r   r   r   r   <module>   s   
	


U	@