o
    }oi                     @   s  d dl mZmZ d dlZd dlmZ d dlmZ d dl	m
Z
mZmZ ddlmZ ddlmZ dd	lmZmZmZmZmZ d
edededededededededefddZedkrEe  Zee eej dddeZ e dd \
Z!Z"Z#Z$Z%Z&Z'Z(Z)Z*eee!e"e#e$e%e&e'e(e*
Z+e! de$ de% de& de' de" d e# dZ,eee-d   dej. de, Z/eej ej0ej1ej2e!ej3ej4ej5ej6i ej7ej8ej9ej:rd!ndd"Z;ed#e%d$krd%ndej d&v d'gZ<ej=re<>ed(d)d* ej?rej@dusJ e<>e
ej@d+ eAe/*ZBeBjCe+e;e/e<d, ejDs&eBjd#d#d- neBD  W d   dS W d   dS 1 s>w   Y  dS dS ).    )basenamesplitextN)pretrain_recipe)get_nmt_tokenizer)MemoryProfilePlugin
NsysPluginPerfEnvPlugin   )parse_cli_args)slurm_executor)args_sanity_checkget_user_configsloggingset_exp_logging_configsset_primary_perf_configsargs	num_nodesmbsgbstp_sizepp_sizecp_sizevp_sizeep_sizeenable_cuda_graphsc
                 C   s   t dd}
t|
d|| j||| j||||||	| j| j| j| j| jd}
t	|
ddd| j
| j| j| j}
| jr9td tjtdd	d
d|
j_|
jj|
j_|
S )z
    nemotron3 8b pre-train recipe aimed at achieving best possible performance.

    NOTE: Use fp8 precision training with caution. It might not give desirable results.
    T)performance_mode	pre_train)r   compute_dtype
fp8_recipenccl_communicator_config_pathuse_user_buffer_registration	use_sharpllm	nemotron3zJHuggingFace tokenizer not supported for Nemotron3 8B. Using NullTokenizer.nullNullTokenizeri  )library
model_name
vocab_size)r   r   gpus_per_node	max_stepsr   r   r   r    r!   r   tensorboardwandbwandb_prj_namewandb_job_nameuse_hf_tokenizerr   warningrunConfigr   data	tokenizermodel)r   r   r   r   r   r   r   r   r   r   recipe r7   a/home/ubuntu/.local/lib/python3.10/site-packages/scripts/performance/llm/pretrain_nemotron3_8b.pyoverride_recipe_configs   s>   


r9   __main__r   r#   8b
   nodes_tp_pp_cp_vp_mbs_sharp)custom_mountscustom_env_varshf_token	nemo_home	wandb_keynetworkT   i    )b200gb200)enable_vboostnccl_pp_comm_chunksizegpu_sm100_or_newer      )
start_stepend_step)dir)executornameplugins)
sequentialdetach)Eos.pathr   r   nemo_runr1   )nemo.collections.llm.recipes.nemotron3_8br   3nemo.collections.nlp.modules.common.tokenizer_utilsr   nemo.lightning.run.pluginsr   r   r   argument_parserr
   	executorsr   helpersr   r   r   r   r   strintboolr9   __name__
parse_argsr   gpulowerkwargsr   r   r   r   r   r   r   r   rA   r   r6   
exp_config__file__r   exp_nameaccount	partitionlog_dirr)   
time_limitcontainer_imagerD   rF   rG   rH   r!   rU   rW   enable_nsysappendenable_memory_profilememory_profile_out_path
Experimentexpadddryrunr7   r7   r7   r8   <module>   s   	


5
 ."
	$