o
    }oio                     @   s   d dl mZmZ d dlZd dlmZ d dlmZ d dl	m
Z
mZ d dlmZ d dlmZ d dlmZ d d	lmZmZmZ d
dlmZ d
dlmZ d
dlmZmZmZ d
dlmZm Z m!Z! dZ"dZ#dZ$de%de&de&de&de&de&de&de&de&de'fddZ(e)dkre * Z+ee+ ee+j,- e+j.dde+Z/e/dd  \
Z0Z1Z2Z3Z4Z5Z6Z7Z8Z9e(e+e0e1e2e3e4e5e6e7e9
Z:e0 d!e3 d"e4 d#e5 d$e6 d%e1 d&e2 dZ;e+j. d%eee<d   d%e+j= d%e; Z>ee+j,- e+j?e+j@e+jAe0e+jBe+jCe+jDe+jEi e+jFe+jGe+jHe+jIrd'ndd(ZJede4d)krd*nde+j,- d+v d,gZKe+jLr$eKMed d-dd. e+jNr9e+jOdus0J eKMee+jOd/ ePe>EZQe#sZe+jFdusLJ d0eQjRe eJe d1e" d2  eQjRe:eJe>eKd3 e+jSsoeQjddd4 neQS  W d   dS W d   dS 1 sw   Y  dS dS )5    )basenamesplitextN)MockDataModule)SquadDataModule)finetune_recipemodel)get_nmt_tokenizer)"MegatronEnableExperimentalCallback)MegatronTokenDropCallback)MemoryProfilePlugin
NsysPluginPerfEnvPlugin   )parse_cli_args)slurm_executor)args_sanity_checkget_user_configsset_primary_perf_configs)hf_tokenizerimport_ckpt_experimentisfile_train_pack_metadatazdeepseek-ai/DeepSeek-V3-BaseTargs	num_nodesmbsgbstp_sizepp_sizecp_sizevp_sizeep_sizeenable_cuda_graphsc
                 C   s  | j dkrdn| j }
t|
ddd}tjtd|dd|_t|jd	r'|jjd
u r+g |j_| j	
 dv rBd|jj_d|jj_d|jj_nd|jj_d|jj_trX|jjtt d|jj_d|jj_|jjtt t||
|| j||| j||||||	| j| j| j| j| jd}d
|j_| j rt!t"|j_#ntjt$dddd|j_#|jj#|j_#|jj%t&krt't"|jsd|j_(d|jj_)d|jj_*d|jj_+d|jj,_-d|jj,_.d
|jj,_/d
|jj,_0d|jj,_1|S )z
    deepseek v3 finetune recipe aimed at achieving best possible performance.

    NOTE: Use fp8 precision training with caution. It might not give desirable results.
    sftnoneFT)peft_schemepacked_sequenceperformance_modei      )
seq_lengthglobal_batch_sizemicro_batch_size	callbacksN)h100flexalltoall)r    compute_dtype
fp8_recipenccl_communicator_config_pathuse_user_buffer_registration	use_sharpnullNullTokenizeri  )library
model_name
vocab_sizefulluniform)2
finetuningr   runConfigr   datahasattrtrainerr*   gpulowerr   configmoe_token_dispatcher_typemoe_enable_deepepmoe_shared_expert_overlapUSE_TOKEN_DROPappendr
   moe_permute_fusionapply_rope_fusionr	   r   gpus_per_node	max_stepsr.   r/   r0   r1   r2   resumerestore_configuse_hf_tokenizerr   HF_MODEL_URI	tokenizerr   __fn_or_cls__r   r   force_redownloadrecompute_granularityrecompute_methodrecompute_num_layersstrategy"account_for_loss_in_pipeline_split'account_for_embedding_in_pipeline_split"num_layers_in_first_pipeline_stage!num_layers_in_last_pipeline_stagesequence_parallel)r   r   r   r   r   r   r   r   r   r    finetuning_schemerecipe r^   `/home/ubuntu/.local/lib/python3.10/site-packages/scripts/performance/llm/finetune_deepseek_v3.pyoverride_recipe_configs*   sl   














r`   __main__deepseekv3
   nodes_tp_pp_cp_vp_mbs_sharp)custom_mountscustom_env_varshf_token	nemo_home	wandb_keynetworkr&   i    )b200gb200)enable_vboostnccl_pp_comm_chunksizegpu_sm100_or_newer   )
start_stepend_step	gen_shape)dirz>HF token is required for importing checkpoint from HuggingFacezhf://)source)executornameplugins)
sequentialdetach)Tos.pathr   r   nemo_runr;   "nemo.collections.llm.gpt.data.mockr   #nemo.collections.llm.gpt.data.squadr   (nemo.collections.llm.recipes.deepseek_v3r   r   3nemo.collections.nlp.modules.common.tokenizer_utilsr   Fnemo.lightning.pytorch.callbacks.megatron_enable_experimental_callbackr	   /nemo.lightning.pytorch.callbacks.moe_token_dropr
   nemo.lightning.run.pluginsr   r   r   argument_parserr   	executorsr   helpersr   r   r   utilsr   r   r   rO   SKIP_IMPORTrF   strintboolr`   __name__
parse_argsr   r@   rA   r:   kwargsr   r   r   r   r   r   r   r   ri   r    r]   
exp_config__file__r.   exp_nameaccount	partitionlog_dirrJ   
time_limitcontainer_imagerl   rn   ro   rp   r2   r}   r   enable_nsysrG   enable_memory_profilememory_profile_out_path
Experimentexpadddryrunr^   r^   r^   r_   <module>   s   	


_
 .*
$