o
    }oi                     @   s  d dl Z d dlZd dlmZ d dlmZ d dlmZ d dl	m
Z
 d dlmZ d dlmZ d dlmZ d d	lmZmZ d d
lmZmZ d dlmZ d dlmZ dd Zedkre ZdZeddejej dZ!eej"ddde!dZ#e
j$ddddedddddej%dZ&e
j'e&e#j!dZ(e) Z*ed d!d"Z+d#ej,fd$d%Z-ee-ej.e-ej/d&d'gd(Z0e+e0e gZ1g Z2ed)d*Z3e24e3 ed+d,d-d.d!d/Z5ee5d0Z6ej7ej8ej9d1e*e2e1d2d3ej:d4d5d6	Z;eej<d7Z=ed!d!d8Z>ee(e#e;e=e>d9e6d: dS dS );    N)TensorBoardLogger)OptimizerConfig)	lightning)llm)train)PreTrainingDataModule)get_nmt_tokenizer)
AutoResume
NeMoLogger)ModelCheckpointParameterDebugger)MegatronOptimizerModule)4AssertOptimizerParamGroupsHaveAtLeastTwoWeightDecaysc                  C   s   t jdd} | jdtdd | jdtdd | jdtd	d | jd
tdd | jdtdd | jdtdd | jdtdd | jddddd |  S )Nz&Train a small GPT model using NeMo 2.0)descriptionz	--devicesz%Number of devices to use for training)typehelpz--max-stepszNumber of steps to train forz--experiment-dirz-directory to write results and checkpoints toz--data-pathzPath to data filez--vocab-pathzPath to vocab filez--merges-pathzPath to merges filez--index-mapping-dirz$directory to write index mappings toz--no-masked-softmax-fusionstore_falsezDisable fusion of softmax.masked_softmax_fusion)actionr   dest)argparseArgumentParseradd_argumentintstr
parse_args)parser r   b/home/ubuntu/.local/lib/python3.10/site-packages/tests/collections/llm/megatron_gpt_pretraining.pyget_args#   s   r   __main__i   megatronGPT2BPETokenizer)
vocab_filemerges_file    i  )paths
seq_lengthglobal_batch_sizeseed	tokenizer   i   i   gZd;O?g?gh㈵>   )
num_layershidden_sizeffn_hidden_sizenum_attention_headsr'   init_method_stdhidden_dropoutattention_dropoutlayernorm_epsilonmake_vocab_size_divisible_byr   )r*   i  T)every_n_train_stepssave_optim_on_train_end	precisionc                    s   dt jdd f fdd}|S )Ntensorreturnc                    s   | j  ksJ d S )N)dtype)r9   r8   r   r   verify_precision^   s   z1create_verify_precision.<locals>.verify_precision)torchTensor)r8   r=   r   r<   r   create_verify_precision]   s   r@   on_train_starton_train_end)param_fngrad_fnlog_on_hooksdummy)save_diradamga2U0*C?giUMu?F)	optimizerlrmin_lruse_distributed_optimizerbf16)configgpu      z
bf16-mixedr<   )	devices	max_stepsacceleratorstrategylogger	callbackslog_every_n_stepslimit_val_batchesplugins)log_dir)resume_if_existsresume_ignore_no_checkpointdata)modelr^   trainerlogresumer*   optim)?r   r>   lightning.pytorch.loggersr   megatron.core.optimizerr   nemor   nlnemo.collectionsr   nemo.collections.llm.apir   nemo.collections.llm.gpt.datar   3nemo.collections.nlp.modules.common.tokenizer_utilsr   nemo.lightningr	   r
    nemo.lightning.pytorch.callbacksr   r   %nemo.lightning.pytorch.optim.megatronr   tests.collections.llm.commonr   r   __name__argsr'   
vocab_pathmerges_pathr*   	data_pathr^   	GPTConfigr   
gpt_configGPTModelr_   MegatronStrategyrU   checkpoint_callbackr;   r@   bfloat16float32debuggerrW   loggerstensorboard_loggerappend
opt_configoptTrainerrR   rS   MegatronMixedPrecisionr`   experiment_dirnemo_loggerrb   r   r   r   r   <module>   s   



