o
    }oiE                     @   s  d dl Z d dlmZ d dlmZ d dlmZmZmZm	Z	 d dl
mZ d dlZd dlmZ d dlmZ d dlmZmZmZmZmZ d dlmZmZ d dlmZ eZejj d	d
					d*dej!dej"dedeee ej#e f deee ej#e f dee dee dee	eeef  defddZ$ejj d	d
			d+dej!dej"dedeee ej#e f deee ej#e f dee defddZ%ejj d	d
				d,dej!dej"dedeee ej#e f deee ej#e f dee dee	eeef  defddZ&ejj d	d
					d*dej!dej"dedeee ej#e f deee ej#e f dee dee dee	eeef  defddZ'dd Z(ejj dd	d d!d Z)dej!dej"deddfd"d#Z*dej!dej"dedee dee dee dee dee	eeef  defd$d%Z+d&d' Z,					d*dej!dej"dedee dee dee dee dee	eeef  ddfd(d)Z-dS )-    N)deepcopy)Path)AnyCallableOptionalUnion)	Annotated)
AutoResume
NeMoLoggerOptimizerModuleTrainer-configure_no_restart_validation_training_loop)PEFTModelTransform)loggingspeechlm)	namespacemodeldatatrainerlogresumeoptim	tokenizermodel_transformreturnc           	   
   C   *   t | |||||||d}|| | |jS )aY  
    Trains a model using the specified data and trainer, with optional tokenizer, source, and export.

    Args:
        model (pl.LightningModule): The model to be trained.
        data (pl.LightningDataModule): The data module containing training data.
        trainer (Trainer): The trainer instance configured with a MegatronStrategy.
        log (NeMoLogger): A nemologger instance.
        resume (Optional[Union[AutoResume, Resume]]): Resume training from a checkpoint.
        optim (Optional[OptimizerModule]): The optimizer module to be used. If not provided, the default optimizer
            from the model will be used.
        tokenizer (Optional[TokenizerType]): Tokenizer setting to be applied. Can be 'data' or 'model'
            or an instance of TokenizerSpec.
        export (Optional[str]): Filename to save the exported checkpoint after training.
        model_transform (Optional[Union[Callable[[nn.Module], nn.Module], PEFT]]): A model transform to be applied.

    Returns
    -------
        Path: The directory path where training artifacts are saved.

    Examples
    --------
        >>> from nemo.collections import llm
        >>> from nemo import lightning as nl
        >>> model = llm.MistralModel()
        >>> data = llm.SquadDataModule(seq_length=4096, global_batch_size=16, micro_batch_size=2)
        >>> precision = nl.MegatronMixedPrecision(precision="bf16-mixed")
        >>> trainer = nl.Trainer(strategy=nl.MegatronStrategy(tensor_model_parallel_size=2), plugins=precision)
        >>> llm.train(model, data, trainer, tokenizer="data")
        PosixPath('/path/to/log_dir')
    r   r   r   r   r   r   r   r   )_setupfitexp_dir	r   r   r   r   r   r   r   r   	app_state r#   Q/home/ubuntu/.local/lib/python3.10/site-packages/nemo/collections/speechlm/api.pytrain&   s   +r%   c              	   C   s*   t | |||||d t| |||||ddS )a  
    Pretrains a model using the specified data and trainer, with optional logging, resuming, and optimization.

    This function is a wrapper around the `train` function, specifically configured for pretraining tasks.
    Note, by default it will use the tokenizer from the model.

    Args:
        model (pl.LightningModule): The model to be pretrained.
        data (pl.LightningDataModule): The data module containing pretraining data.
        trainer (Trainer): The trainer instance configured with a MegatronStrategy.
        log (NeMoLogger): A nemologger instance.
        resume (Optional[AutoResume]): Resume training from a checkpoint.
        optim (Optional[OptimizerModule]): The optimizer module to be used. If not provided, the default
            optimizer from the model will be used.

    Returns:
        Path: The directory path where pretraining artifacts are saved.

    Examples:
        >>> from nemo.collections import llm
        >>> from nemo import lightning as nl
        >>> model = llm.MistralModel()
        >>> data = llm.PretrainingDataModule(paths=[...], seq_length=4096, global_batch_size=16, micro_batch_size=2)
        >>> precision = nl.MegatronMixedPrecision(precision="bf16-mixed")
        >>> trainer = nl.Trainer(strategy=nl.MegatronStrategy(tensor_model_parallel_size=2), plugins=precision)
        >>> llm.pretrain(model, data, trainer)
        PosixPath('/path/to/log_dir')
    )r   r   r   r   )r   r   r   r   r   r   r   _validate_configr%   )r   r   r   r   r   r   r#   r#   r$   pretraina   s   %r(   peftc              
   C   s.   t | ||||||d t| |||||d|dS )a  
    Finetunes a model using the specified data and trainer, with optional logging, resuming, and PEFT.

    Note, by default it will use the tokenizer from the model.

    Args:
        model (pl.LightningModule): The model to be finetuned.
        data (pl.LightningDataModule): The data module containing finetuning data.
        trainer (Trainer): The trainer instance configured with a MegatronStrategy.
        log (NeMoLogger): A nemologger instance.
        resume (Optional[AutoResume]): Resume training from a checkpoint.
        optim (Optional[OptimizerModule]): The optimizer module to be used. If not provided, the default
            optimizer from the model will be used.
        peft (Optional[PEFT]): A PEFT (Parameter-Efficient Fine-Tuning) configuration to be applied.

    Returns:
        Path: The directory path where finetuning artifacts are saved.

    Examples:
        >>> from nemo.collections import llm
        >>> from nemo import lightning as nl
        >>> model = llm.MistralModel()
        >>> data = llm.SquadDataModule(seq_length=4096, global_batch_size=16, micro_batch_size=2)
        >>> precision = nl.MegatronMixedPrecision(precision="bf16-mixed")
        >>> trainer = nl.Trainer(strategy=nl.MegatronStrategy(tensor_model_parallel_size=2), plugins=precision)
        >>> llm.finetune(model, data, trainer, peft=llm.peft.LoRA()])
        PosixPath('/path/to/log_dir')
    )r   r   r   r   r   r   r&   )r   r   r   r   r   r   r)   r#   r#   r$   finetune   s   'r*   c           	   
   C   r   )a  
    Validates a model using the specified data and trainer, with optional logging, resuming, and model transformations.

    Args:
        model (pl.LightningModule): The model to be validated.
        data (pl.LightningDataModule): The data module containing validation data.
        trainer (Trainer): The trainer instance configured with a MegatronStrategy.
        log (NeMoLogger): A nemologger instance.
        resume (Optional[AutoResume]): Resume from a checkpoint for validation.
        optim (Optional[OptimizerModule]): The optimizer module to be used. If not provided, the default optimizer
            from the model will be used.
        tokenizer (Optional[TokenizerType]): Tokenizer setting to be applied. Can be 'data' or 'model'
            or an instance of TokenizerSpec.
        model_transform (Optional[Union[Callable[[nn.Module], nn.Module], PEFT]]): A model transform to be applied.

    Returns:
        Path: The directory path where validation artifacts are saved.

    Examples:
        >>> from nemo.collections import llm
        >>> from nemo import lightning as nl
        >>> model = llm.MistralModel()
        >>> data = llm.SquadDataModule(seq_length=4096, global_batch_size=16, micro_batch_size=2)
        >>> precision = nl.MegatronMixedPrecision(precision="bf16-mixed")
        >>> trainer = nl.Trainer(strategy=nl.MegatronStrategy(tensor_model_parallel_size=2), plugins=precision)
        >>> llm.validate(model, data, trainer, tokenizer="data")
        PosixPath('/path/to/log_dir')
    r   )r   validater    r!   r#   r#   r$   r+      s   'r+   c                   C      t d)z(
    Evaluates NeMo SpeechLM model.
    'This function will be implemented laterNotImplementedErrorr#   r#   r#   r$   evaluate   s   r0   generate)namer   c                   C   r,   )z3
    Generates text using a NeMo Speech model.
    r-   r.   r#   r#   r#   r$   r1     s   c                 C   s   |dkrt | d|j d S |dkrt |d| j d S z!ddlm} t||r5t | d| t |d| W d S td|  tyF   tdw )Nr   r   r   r   )TokenizerSpecz2Expected TokenizerSpec or 'data' or 'model', got: zTokenizerSpec is not available)_set_with_ior   1nemo.collections.common.tokenizers.tokenizer_specr3   
isinstance
ValueErrorImportError)r   r   r   r3   r#   r#   r$   _use_tokenizer  s   
r9   c           
      C   s   t | |pt }|rt|tr|jrtd d|j_|j|t	|ddt	t
dd d}	|d ur6|||  |r=||  |rEt| || |rMt| d| t	| dd rqtdd |jD sqt|trj|j| |	S |jt  |	S )	Nz8Disabling try_restore_best_ckpt restoration for adaptersFresume_if_exists__io__)r:   task_configr   c                 s   s    | ]}t |tV  qd S )N)r6   r   ).0cbr#   r#   r$   	<genexpr>@  s    z_setup.<locals>.<genexpr>)r   r
   r6   r   ckptr   infotry_restore_best_ckptsetupgetattrr%   connectr9   r4   any	callbacksr   append)
r   r   r   r   r   r   r   r   _logr"   r#   r#   r$   r     s2   






r   c                 C   s@   t | || t| drt|drt | j|t|j d S d S d S )Nr;   )setattrhasattrr;   r   )objattrvaluer#   r#   r$   r4   I  s   r4   c                 C   s  t | drUt| jdddksJ t| jdddksJ | jjdks#J | jjdks+J | jjdks3J | jjdks;J t | jdrTt| jdd d urT| jj| jjksTJ nt	|j
tjr`J dt |drl|jdkslJ t |drx|jdksxJ t |dr|jdksJ t |drt |dr|j|j dksJ d	t	|j
tjr|j
jdksJ |j
jdksJ |j
jdksJ |j|j |j
j|j
j |j
j  dksJ d
|j|j|j|j |j
j|j
j |j
j    dksJ d|j
jdkr|j
jdkrtd d|j
_|j
jdkr|j
jd usJ dn |j
jd ur*td d |j
_|j
jd ur:td d |j
_|j
jdkr_t | dr_| jjd ur_| jj|j
jd  dks_J d|j
jdkrt | dr| jjd uswJ d| jj|j
j dksJ dd S d S d S d S )Nconfig
seq_length   r   max_position_embeddingszExpected model.config to existmicro_batch_sizeglobal_batch_sizezGGlobal batch size must be divisible by micro batch size in data module.z[Number of GPUs must be divisible by the product of all parallelism sizes for data parallel.z]Global batch size must be divisible by the product of micro batch size and data parallel sizeTzKDisabling sequence parallelism because tensor model parallelism is disabledFzCpipeline_dtype must be set if pipeline model parallelism is enabledzUDisabling virtual pipeline parallelism because pipeline model parallelism is disabledzMSetting pipeline dtype to None because pipeline model parallelism is disabled   z[Sequence length must be divisible by 2 * context parallel size if context parallel is used.z<num_experts must be non None to use expert model parallelismzENumber of experts should be a multiple of expert model parallel_size.)rK   rD   rO   
num_layershidden_sizenum_attention_headsffn_hidden_sizerP   rR   r6   strategynlMegatronStrategyrS   rT   tensor_model_parallel_sizepipeline_model_parallel_sizecontext_parallel_sizenum_devices	num_nodessequence_parallelwarningswarnpipeline_dtype$virtual_pipeline_model_parallel_sizeexpert_model_parallel_sizenum_moe_expertsr   r#   r#   r$   r'   O  s   








9r'   )NNNNN)NNN)NNNN).rc   copyr   pathlibr   typingr   r   r   r   lightning.pytorchpytorchplnemo_runruntyping_extensionsr   nemo.lightning	lightningr[   r	   r
   r   r   r    nemo.lightning.pytorch.callbacksr   r   
nemo.utilsr   TokenizerTypecli
entrypointLightningModuleLightningDataModuleConfigr%   r(   r*   r+   r0   r1   r9   r   r4   r'   r#   r#   r#   r$   <module>   s.  
:03	6
	
+
	