o
    }oi                  *   @   sb  d dl mZ d dlmZ d dlZd dlZd dlm	Z	 d dl
mZ d dlmZmZ d dlmZmZmZ d dlmZ d dlmZ d	Zejjed
d5dedejej fddZejje	eddddddejdddddddddddddddddd dd e	fd!ee d"eded#ed$ed%eej  d&ee d'ed(e!d)ed*ed+ed,ed-ed.e"d/ed0ed1ed2edej#f(d3d4Z$dS )6    )OptionalN)pretrain)BERTMockDataModule)
bert_modelbert_trainer)default_logdefault_resumetensorboard_logger),distributed_fused_adam_with_cosine_annealing)TimingCallback	bert_110mnamehuggingface	bert_typereturnc                 C   s   t t| dS )a  
    Factory function to create a Bert-Base (110 million) model configuration.

    Returns:
        run.Config[pl.LightningModule]: Configuration for the BERT-Base (110 million) model.

    Examples:
        CLI usage:
            $ nemo llm pretrain model=bert_110m ...

        Python API usage:
            >>> model_config = model(bert_type="megatron")
            >>> print(model_config)
    )versionr   )r   NAMEr    r   Z/home/ubuntu/.local/lib/python3.10/site-packages/nemo/collections/llm/recipes/bert_110m.pymodel   s   r   )targetr   defaultmegatron   F   i{ z
bf16-mixedg      ?    
   i     i   i  gh㈵>g-C6?dirr   tensor_parallelismpipeline_parallelismpipeline_parallelism_typevirtual_pipeline_parallelismcontext_parallelismsequence_parallelism	num_nodesnum_gpus_per_node	max_steps	precisionaccumulate_grad_batchesgradient_clip_vallimit_test_batcheslimit_val_batcheslog_every_n_stepsval_check_intervalc                 C   s   t j|t|dtdi d|d|d|d|d|d|d|	d	|
d
|d|d|d|d|d|d|dt tgt jt|||dt| |t|ddt	||||||dt
 dS )a0	  
    Create a pre-training recipe for BERT-base (110M) model.

    This function sets up a complete configuration for pre-training, including
    model, trainer, data, logging, optimization, and resumption settings.

    Args:
        dir (Optional[str]): Directory for saving logs and checkpoints.
        name (str): Name of the pre-training run.
        bert_type (str): Either "megatron" or "huggingface" type of BERT.
        tensor_parallelism (int): Degree of tensor model parallelism.
        pipeline_parallelism (int): Degree of pipeline model parallelism.
        pipeline_parallelism_type (Optional[torch.dtype]): Data type for pipeline parallelism.
        virtual_pipeline_parallelism (Optional[int]): Size of virtual pipeline parallelism.
        context_parallelism (int): Degree of context parallelism.
        sequence_parallelism (bool): Whether to use sequence parallelism.
        num_nodes (int): Number of compute nodes to use.
        num_gpus_per_node (int): Number of GPUs per node.
        max_steps (int): Maximum number of training steps.
        precision (str): Precision configuration, one of fp32, 16-mixed or bf16-mixed.
        accumulate_grad_batches (int): Number of steps per gradient accumulation.
        gradient_clip_val (float): Value for gradient clipping.
        limit_test_batches (int): Limit the number of test batches.
        limit_val_batches (int): Limit the number of validation batches.
        log_every_n_steps (int): Log every n steps.
        val_check_interval (int): Run validation every N steps.
        global_batch_size (int): Global batch size.
        micro_batch_size (int): Micro batch size.
        seq_length (int): Sequence length.
        warmup_steps (int): Number of warmup steps.
        constant_steps (int): Number of constant steps.
        min_lr (float): Minimum learning rate.
        max_lr (float): Maximum learning rate.
        fn (Callable): The pre-training function to use.

    Returns:
        run.Partial: Partial configuration for pre-training.

    Examples:
        CLI usage:
            $ nemo llm pretrain --factory bert_110m
            $ nemo llm pretrain --factory "bert_110m(num_nodes=1, name='my_bert_pretrain')"

        Python API usage:
            >>> recipe = pretrain_recipe(name="bert_pretrain", num_nodes=1)
            >>> print(recipe)
    r   r!   r"   r#   r$   r%   r&   r'   r(   r)   r*   r+   r-   r.   r/   r0   	callbacks)
seq_lengthglobal_batch_sizemicro_batch_sizer   )r    r   r	   )r*   warmup_stepsconstant_stepsmin_lrmax_lr	clip_grad)r   trainerdatalogoptimresumeNr   )runPartialr   r   Configr   r   r   r	   r
   r   )r    r   r   r!   r"   r#   r$   r%   r&   r'   r(   r)   r*   r+   r,   r-   r.   r/   r0   r3   r4   r2   r5   r6   r7   r8   fnr   r   r   pretrain_recipe2   sj   R	
rC   )r   )%typingr   lightning.pytorchpytorchplnemo_runr?   torchnemo.collections.llm.apir   #nemo.collections.llm.bert.data.mockr   !nemo.collections.llm.recipes.bertr   r   (nemo.collections.llm.recipes.log.defaultr   r   r	   'nemo.collections.llm.recipes.optim.adamr
   nemo.utils.exp_managerr   r   clifactorystrrA   LightningModuler   bfloat16intdtypeboolfloatr@   rC   r   r   r   r   <module>   s   	
!