o
    wi                  $   @   s6  d dl mZ d dlmZ d dlZd dlZd dlm	Z	 d dl
mZ d dlmZmZmZmZmZmZmZmZmZmZmZmZ d dlmZmZ dedejej fd	d
Z 																d&de!de!deej" dee! de!de#de!de!de!dede!de!d e!d!e!d"e!d#ee$eje	   dejej% f"d$d%Z&dS )'    )OptionalN)Callback)	lightning)Qwen2Config1P5BQwen2Config7BQwen2Config72BQwen2Config500M
Qwen2ModelQwen25Config1P5BQwen25Config3BQwen25Config7BQwen25Config14BQwen25Config32BQwen25Config72BQwen25Config500M)
bf16_mixed
fp16_mixedversionreturnc                 C   s  d}| dkrt t}nc| dkrt t}nY| dkr t t}nO| dkr*t t}nE| dkr4t t}n;| dkr>t t}n1| dkrHt t}n'| d	krRt t	}n| d
kr\t t
}n| dkrft t}n	| dkrot t}|duszJ d|  t jt|dS )a  
    A function to create a qwen2 models.

    Args:
        version (str): The version of the qwen2 model to create. one of ["qwen2_500m", "qwen2_1p5b",
            "qwen2_7b", "qwen2_72b"].

    Returns:
        run.Config[pl.LightningModule]: Configuration for the qwen2 model.
    N
qwen2_500mqwen25_500m
qwen2_1p5bqwen25_1p5b	qwen25_3bqwen2_7b	qwen25_7b
qwen25_14b
qwen25_32b	qwen2_72b
qwen25_72bzInvalid version: )config)runConfigr   r   r   r
   r   r   r   r   r   r   r   r	   )r   r     r#   _/home/ubuntu/sommelier/.venv/lib/python3.10/site-packages/nemo/collections/llm/recipes/qwen2.pyqwen2_model(   s2   
r%         F   { 
bf16-mixed    
     tensor_parallelismpipeline_parallelismpipeline_parallelism_typevirtual_pipeline_parallelismcontext_parallelismsequence_parallelism	num_nodesnum_gpus_per_node	max_steps	precisionaccumulate_grad_batcheslimit_test_batcheslimit_val_batcheslog_every_n_stepsval_check_interval	callbacksc                 C   sp   t jtj| |||||ddddd}d}|	dkrt }n|	dkr"t }t jtjd|||
|||||||d|d}|S )	a  
    Configure the NeMo Lightning Trainer for qwen2 models.

    This function sets up the distributed training strategy and other training parameters.

    Args:
        tensor_parallelism (int): Degree of tensor model parallelism.
        pipeline_parallelism (int): Degree of pipeline model parallelism.
        pipeline_parallelism_type (Optional[torch.dtype]): Data type for pipeline parallelism.
        virtual_pipeline_parallelism (Optional[int]): Size of virtual pipeline parallelism.
        context_parallelism (int): Degree of context parallelism.
        sequence_parallelism (bool): Whether to use sequence parallelism.
        num_nodes (int): Number of compute nodes to use.
        num_gpus_per_node (int): Number of GPUs per node.
        max_steps (int): Maximum number of training steps.
        precision (str): Precision configuration, one of fp32, 16-mixed or bf16-mixed.
        accumulate_grad_batches (int): Number of steps per gradient accumulation.
        limit_test_batches (int): Limit the number of test batches.
        limit_val_batches (int): Limit the number of validation batches.
        log_every_n_steps (int): Log every n steps.
        val_check_interval (int): Run validation every N steps.
        callbacks (Optional[list[run.Config[Callback]]]): List of callback configurations.

    Returns:
        run.Config[nl.Trainer]: Configuration for the NeMo Lightning Trainer.
    T)
tensor_model_parallel_sizepipeline_model_parallel_sizepipeline_dtype$virtual_pipeline_model_parallel_sizecontext_parallel_sizesequence_parallelgradient_as_bucket_viewckpt_include_optimizerckpt_async_saveckpt_parallel_loadNz16-mixedr*   gpuF)acceleratorr=   devicesr8   r9   r:   r;   r6   r4   pluginsstrategyuse_distributed_samplerr<   )r!   r"   nlMegatronStrategyr   r   Trainer)r.   r/   r0   r1   r2   r3   r4   r5   r6   r7   r8   r9   r:   r;   r<   r=   rL   precision_plugintrainerr#   r#   r$   qwen2_trainerO   sF   ,rS   )r&   r'   NNr'   Fr'   r(   r)   r*   r'   r+   r+   r,   r-   N)'typingr   lightning.pytorchpytorchplnemo_runr!   torch$lightning.pytorch.callbacks.callbackr   nemor   rN   $nemo.collections.llm.gpt.model.qwen2r   r   r   r   r	   r
   r   r   r   r   r   r   6nemo.collections.llm.recipes.precision.mixed_precisionr   r   strr"   LightningModuler%   intdtypeboollistrP   rS   r#   r#   r#   r$   <module>   sx   8(	

