o
    }oiR                     @   s  d dl Z d dlZd dlmZ d dlZd dlmZ d dlmZ	 d dl
mZ d dlmZ dd Zed	kre Ze	jejejejd
dZe	jejejdee	jdddddd d	Ze	jddddddZe	jejdedZe	j eddddddddZ!ej"dej#dd dZ$ed d!Z%ej&d"kre'd# ej(e) e%d$Z*nej&d kre'd% ej(e+ e%d$Z*ne,d&e	j-ddd'Z.ej/e*e$eee!e.d( e'd) dS dS )*    N)	dataclass)OptimizerConfig)	lightning)llm)get_nmt_tokenizerc                  C   s   t jdd} | jdtdd | jdtddd	 | jd
tddd	 | jdtddd	 | jdtddd	 | jdtddd	 | jdtdd |  S )Nz-Pretraining a small BERT model using NeMo 2.0)descriptionz--experiment_dirz-directory to write results and checkpoints to)typehelpz	--devices   znumber of devices)r   defaultr	   z--max_steps   z--mbszmicro batch sizez	--tp_sizeztensor parallel sizez	--pp_sizezpipeline parallel sizez--typehuggingface)r   r   )argparseArgumentParseradd_argumentstrint
parse_args)parser r   Z/home/ubuntu/.local/lib/python3.10/site-packages/tests/collections/llm/bert_pretraining.pyget_args   s   r   __main__log_all)tensor_model_parallel_sizepipeline_model_parallel_sizepipeline_dtypeckpt_load_strictnessgpuz
bf16-mixed)	precisionr
      )	devices	max_stepsacceleratorstrategypluginslog_every_n_stepslimit_val_batchesval_check_intervalnum_sanity_val_stepsTreduced_train_loss)	save_lastmonitor
save_top_ksave_on_train_epoch_endsave_optim_on_train_endF)log_diruse_datetime_versionckptadamg-C6?g\(\?g      ?)	optimizerlr
adam_beta2use_distributed_optimizer	clip_gradbf16)configi      )
seq_lengthmicro_batch_sizeglobal_batch_sizenum_workersmegatronBertWordPieceLowerCaser   z Init HuggingFace Bert Base Model)	tokenizerzInit Megatron Bert Base ModelzUnknown type.)resume_if_existsresume_ignore_no_checkpoint)modeldatatrainerlogoptimresumezBert Pretraining Succeeded)0r   osdataclassesr   torchmegatron.core.optimizerr   nemor   nlnemo.collectionsr   3nemo.collections.nlp.modules.common.tokenizer_utilsr   r   __name__argsMegatronStrategytp_sizepp_sizebfloat16r$   Trainerr!   r"   MegatronMixedPrecisionrG   ModelCheckpointr2   
NeMoLoggerexperiment_dirloggerMegatronOptimizerModuler3   BERTMockDataModulembsrF   rB   r   print	BertModelHuggingFaceBertBaseConfigrE   MegatronBertBaseConfig
ValueError
AutoResumerJ   pretrainr   r   r   r   <module>   s   



