o
    }oi                     @   s   d dl mZ d dlmZ d dlZd dlmZ d dl	m
Z
 d dlmZ d dlmZ dZejjedd	ejej fd
dZejje
ed									ddee dededededee dededed	ejfddZdS )     )OptionalN)llm)finetune)bert_embedding_model)default_finetune_recipee5_340m)namereturnc                   C   s
   t tdS )av  
    Factory function to create a E5-Large (340 million) model configuration.

    Returns:
        run.Config[pl.LightningModule]: Configuration for the E5-Large (340 million) model.

    Examples:
        CLI usage:
            $ nemo llm pretrain model=e5_340m ...

        Python API usage:
            >>> model_config = model()
            >>> print(model_config)
    )version)r   NAME r   r   X/home/ubuntu/.local/lib/python3.10/site-packages/nemo/collections/llm/recipes/e5_340m.pymodel   s   
r   )targetr   intfloat/e5-large-v2default                dirresume_pathr   	num_nodesnum_gpus_per_nodepeft_scheme
seq_lengthmicro_batch_sizeglobal_batch_sizec	                 C   sN   t t || |||}	tjtj|||d}
|
|	_|du s%| dks%J d|	S )a  
    Create a fine-tuning recipe for E5-large (340 million) model.

    This function sets up a complete configuration for fine-tuning, including
    model, trainer, data, logging, optimization, and resumption settings.
    Only SFT is currently supported for E5 model.

    Args:
        dir (Optional[str]): Directory for saving logs and checkpoints.
        name (str): Name of the fine-tuning run.
        num_nodes (int): Number of compute nodes to use.
        num_gpus_per_node (int): Number of GPUs per node.
        peft_scheme (Optional[str]): Name of the peft scheme to use for fine-tuning.
            Allowed values: 'none'/None.
        resume_path (str): Path to the NeMo checkpoint
        seq_length (int): Maximum number of tokens per microbatch.
        micro_batch_size (int): Micro batch size.
        global_batch_size (int): Global batch size.


    Returns:
        run.Partial: Partial configuration for fine-tuning.

    Examples:
        CLI usage:
            $ nemo llm finetune --factory e5_340m

        Python API usage:
            >>> recipe = finetune_recipe(name="e5_340m_finetune", num_nodes=1)
            >>> print(recipe)

    Note:
        This recipe uses the Specter dataset for fine-tuning.
    )r   r   r   NnonezE5 only supports SFT.)r   r   runConfigr   SpecterDataModuledatalower)r   r   r   r   r   r   r   r   r   recipe
datamoduler   r   r   finetune_recipe.   s   .r'   )	Nr   r   r   r   Nr   r   r   )typingr   lightning.pytorchpytorchplnemo_runr    nemo.collectionsr   nemo.collections.llm.apir   +nemo.collections.llm.recipes.bert_embeddingr   -nemo.collections.llm.recipes.finetune_defaultr   r   clifactoryr!   LightningModuler   strintPartialr'   r   r   r   r   <module>   sR   	
