o
    }oi                     @   st  d dl mZmZ d dlZd dlmZ d dlmZ d dl	m
Z
mZ ddlmZ ddlmZ dd	lmZmZmZmZ dd
lmZ dededededededededededededefddZedkr8e  Zee eej dddeZ e dd \Z!Z"Z#Z$Z%Z&Z'Z(Z)Z*Z+Z,Z-eee!e"e#e$e%e&e'e(e*e+e,e-Z.e! de$ d e% d!e& d"e' d#e" d$e# dZ/eee0d   d#ej1 d#e/ Z2eej ej3ej4ej5e!ej6ej7ej8ej9i ej:ej;ej<d%Z=ed&e%d'krd(ndej d)v d*gZ>ej?re>@e
d+d,d- eAe2*ZBeBjCe.e=e2e>d. ejDseBjd&d&d/ neBD  W d   dS W d   dS 1 s1w   Y  dS dS )0    )basenamesplitextN)pretrain_recipe)get_nmt_tokenizer)
NsysPluginPerfEnvPlugin   )parse_cli_args)slurm_executor)args_sanity_checkget_user_configsset_exp_logging_configsset_primary_perf_configs)hf_tokenizerargs	num_nodesmbsgbstp_sizepp_sizecp_sizevp_sizeep_sizeenable_cuda_graphsuse_mcore_fsdprecompute_layersactivation_offload_layersc                 C   s   t dd}t|d|| j||| j||||||	|
||| j| j| jd}t|ddd| j| j	| j
| j}| jr;td|j_|S tjtdd	d
d|j_|jj|j_|S )z
    NemotronH 56b pre-train recipe aimed at achieving best possible performance.

    NOTE: Use fp8 precision training with caution. It might not give desirable results.
    T)performance_mode	pre_train)r   r   r   r   compute_dtype
fp8_recipenccl_communicator_config_pathllm	nemotronhznvidia/Nemotron-H-8B-Base-8KnullNullTokenizeri   )library
model_name
vocab_size)r   r   gpus_per_node	max_stepsr   r    r!   r   tensorboardwandbwandb_prj_namewandb_job_nameuse_hf_tokenizerr   data	tokenizerrunConfigr   model)r   r   r   r   r   r   r   r   r   r   r   r   r   recipe r6   b/home/ubuntu/.local/lib/python3.10/site-packages/scripts/performance/llm/pretrain_nemotronh_56b.pyoverride_recipe_configs   sB   

r8   __main__r   r#   56b   nodes_tp_pp_cp_vp_mbs_)custom_mountscustom_env_varshf_token	nemo_home	wandb_keyT   i    )b200gb200)enable_vboostnccl_pp_comm_chunksizegpu_sm100_or_newer      )
start_stepend_step)executornameplugins)
sequentialdetach)Eos.pathr   r   nemo_runr2   *nemo.collections.llm.recipes.nemotronh_56br   3nemo.collections.nlp.modules.common.tokenizer_utilsr   nemo.lightning.run.pluginsr   r   argument_parserr	   	executorsr
   helpersr   r   r   r   utilsr   strintboolr8   __name__
parse_argsr   gpulowerkwargsr   r   r   r   r   r   r   r   r@   r   r   r   r   r5   
exp_config__file__r   exp_nameaccount	partitionlog_dirr)   
time_limitcontainer_imagerB   rD   rE   rF   rQ   rS   enable_nsysappend
Experimentexpadddryrunr6   r6   r6   r7   <module>   s   	


:

."
	$