o
    }oi8                     @   s  d dl mZmZ d dlmZ d dlZd dlmZ d dl	m
Z
 d dlmZmZmZ ddlmZ dd	lmZ dd
lmZmZmZmZ ddlmZ dedededededededededee defddZedkr[e  Z ee  ee j!" ddde Z#e#dd \
Z$Z%Z&Z'Z(Z)Z*Z+Z,Z-ee e$e%e&e'e(e)e*e+e,e-Z.e$ de' de( d e) d!e* d"e+ d#e, d$e% d%e& dZ/eee0d   d$e j1 d$e/ Z2ee j!" e j3e j4e j5e$e j6e j7e j8e j9i e j:e j;e j<e j=rd&ndd'Z>ed(e(d)krd*nde j!" d+v d,gZ?e j@re?Aed-d.d/ e jBr!e jCdusJ e?Aee jCd0 eDe2*ZEeEjFe.e>e2e?d1 e jGs<eEjd(d(d2 neEG  W d   dS W d   dS 1 sTw   Y  dS dS )3    )basenamesplitext)OptionalN)pretrain_recipe)get_nmt_tokenizer)MemoryProfilePlugin
NsysPluginPerfEnvPlugin   )parse_cli_args)slurm_executor)args_sanity_checkget_user_configsset_exp_logging_configsset_primary_perf_configs)hf_tokenizerargs	num_nodesmbsgbstp_sizepp_sizecp_sizevp_sizeep_sizeetp_sizeenable_cuda_graphsc                 C   s   t dd}t|d|| j||| j||||||	|
| j| j| j| j| jd}t	|ddd| j
| j| j| j}| jr=td|j_|S tjtdd	d
d|j_|jj|j_|S )z
    mixtral 8x7b pre-train recipe aimed at achieving best possible performance.

    NOTE: Use fp8 precision training with caution. It might not give desirable results.
    T)performance_mode	pre_train)use_user_buffer_registration	use_sharpcompute_dtype
fp8_recipenccl_communicator_config_pathllmmixtralzmistralai/Mixtral-8x7B-v0.1nullNullTokenizeri }  )library
model_name
vocab_size)r   r   gpus_per_node	max_stepsr   r    r!   r"   r#   r   tensorboardwandbwandb_prj_namewandb_job_nameuse_hf_tokenizerr   data	tokenizerrunConfigr   model)r   r   r   r   r   r   r   r   r   r   r   recipe r8   a/home/ubuntu/.local/lib/python3.10/site-packages/scripts/performance/llm/pretrain_mixtral_8x7b.pyoverride_recipe_configs   sB   

r:   __main__r   r%   8x7b
   nodes_tp_pp_cp_vp_ep_etp_mbs_sharp)custom_mountscustom_env_varshf_token	nemo_home	wandb_keynetworkT   i    )b200gb200)enable_vboostnccl_pp_comm_chunksizegpu_sm100_or_newer      )
start_stepend_step)dir)executornameplugins)
sequentialdetach)Hos.pathr   r   typingr   nemo_runr4   )nemo.collections.llm.recipes.mixtral_8x7br   3nemo.collections.nlp.modules.common.tokenizer_utilsr   nemo.lightning.run.pluginsr   r   r	   argument_parserr   	executorsr   helpersr   r   r   r   utilsr   strintboolr:   __name__
parse_argsr   gpulowerkwargsr   r   r   r   r   r   r   r   r   r   r7   
exp_config__file__r!   exp_nameaccount	partitionlog_dirr+   
time_limitcontainer_imagerG   rI   rJ   rK   r    rX   rZ   enable_nsysappendenable_memory_profilememory_profile_out_path
Experimentexpadddryrunr8   r8   r8   r9   <module>   s   	


8
 8"
	$