o
    }oi                     @   st  d dl mZmZ d dlZd dlmZ d dlmZ d dl	m
Z
mZ ddlmZ ddlmZ dd	lmZmZmZmZ dd
lmZ dededededededededededededefddZedkr8e  Zee eej dddeZ e dd \Z!Z"Z#Z$Z%Z&Z'Z(Z)Z*Z+Z,Z-eee!e"e#e$e%e&e'e(e*e+e,e-Z.e! de$ d e% d!e& d"e' d#e" d$e# dZ/eee0d   d#ej1 d#e/ Z2eej ej3ej4ej5e!ej6ej7ej8ej9i ej:ej;ej<d%Z=ed&e%d'krd(ndej d)v d*gZ>ej?re>@e
d+d,d- eAe2*ZBeBjCe.e=e2e>d. ejDseBjd&d&d/ neBD  W d   dS W d   dS 1 s1w   Y  dS dS )0    )basenamesplitextN)pretrain_recipe)get_nmt_tokenizer)
NsysPluginPerfEnvPlugin   )parse_cli_args)slurm_executor)args_sanity_checkget_user_configsset_exp_logging_configsset_primary_perf_configs)hf_tokenizerargs	num_nodesmbsgbstp_sizepp_sizecp_sizevp_sizeep_sizeenable_cuda_graphsuse_mcore_fsdprecompute_layersactivation_offload_layersc                 C   s   t  }t|d|| j||| j||||||	|
||| j| j| jd}t|ddd| j| j	| j
| j}| jr9td|j_|S tjtdddd	|j_|jj|j_|S )
z
    NemotronH 8b pre-train recipe aimed at achieving best possible performance.

    NOTE: Use fp8 precision training with caution. It might not give desirable results.
    	pre_train)r   r   r   r   compute_dtype
fp8_recipenccl_communicator_config_pathllm	nemotronhznvidia/Nemotron-H-8B-Base-8KnullNullTokenizeri   )library
model_name
vocab_size)r   r   gpus_per_node	max_stepsr   r   r    r   tensorboardwandbwandb_prj_namewandb_job_nameuse_hf_tokenizerr   data	tokenizerrunConfigr   model)r   r   r   r   r   r   r   r   r   r   r   r   r   recipe r5   a/home/ubuntu/.local/lib/python3.10/site-packages/scripts/performance/llm/pretrain_nemotronh_8b.pyoverride_recipe_configs   sB   
r7   __main__r   r"   8b   nodes_tp_pp_cp_vp_mbs_)custom_mountscustom_env_varshf_token	nemo_home	wandb_keyT   i    )b200gb200)enable_vboostnccl_pp_comm_chunksizegpu_sm100_or_newer      )
start_stepend_step)executornameplugins)
sequentialdetach)Eos.pathr   r   nemo_runr1   )nemo.collections.llm.recipes.nemotronh_8br   3nemo.collections.nlp.modules.common.tokenizer_utilsr   nemo.lightning.run.pluginsr   r   argument_parserr	   	executorsr
   helpersr   r   r   r   utilsr   strintboolr7   __name__
parse_argsr   gpulowerkwargsr   r   r   r   r   r   r   r   r?   r   r   r   r   r4   
exp_config__file__r   exp_nameaccount	partitionlog_dirr(   
time_limitcontainer_imagerA   rC   rD   rE   rP   rR   enable_nsysappend
Experimentexpadddryrunr5   r5   r5   r6   <module>   s   	


:

."
	$