o
    }oiS
                  	   @   sJ  d dl Zd dlmZ d dlmZ d dlmZ dZd%de	d	e	d
ej
fddZedkrd dlZe Zejddd ejdedg dd ejddd ejdddgd ejde	dd e ZejjejddejejdZdej_dej_ejejZej eedejj!ej eejd d!e_"eejj#ejjd"Z$ejee$d#d$ dS dS )&    N)llm)AutoTokenizer)SquadHFDataModulez#/home/TestData/lite/hf_cache/squad/      nodesdevicesreturnc                 C   s    dddd}t j|d|d}|S )N10)TORCH_NCCL_AVOID_RECORD_STREAMSNCCL_NVLS_ENABLENVTE_FUSED_ATTNtorchrun)ntasks_per_nodelauncherenv_vars)runLocalExecutor)r   r   r   executor r   ]/home/ubuntu/.local/lib/python3.10/site-packages/tests/collections/llm/hf/pretrain_nemorun.pylocal_executor_torchrun   s   r   __main__z--modelzmeta-llama/Llama-3.2-1B)defaultz
--strategyauto)r   ddpfsdp)typer   choicesz	--devicesz--acceleratorgpu)r   r   z--max-stepsd   )r   r   pt)
model_namename	num_nodesnum_gpus_per_node	max_stepsg        ztrain[:100])pretrained_model_name)path_or_datasetsplitpad_token_id	tokenizer)r   r   T)r   direct)r   r   )%nemo_runr   nemo.collectionsr   =nemo.collections.common.tokenizers.huggingface.auto_tokenizerr   (nemo.collections.llm.gpt.data.hf_datasetr   	DATA_PATHintr   r   __name__argparseArgumentParserparseradd_argumentstr
parse_argsargshf_auto_model_for_causal_lmpretrain_recipemodelr   r'   recipetrainerval_check_interval
max_epochsHFAutoModelForCausalLMconfigure_tokenizerr,   Configeos_token_iddatar%   r   r   r   r   r   <module>   sF   