o
    }oi                     @   s  d dl Z d dlZd dlmZ d dlmZ d dlmZ d dl	m
Z
 d dlmZ d dlmZ d dlmZ d d	lmZ d d
lmZ d dlmZ dd Zedkre Zi Zdd edD ed< eddedZeejddddeddZe
jj jj!dddddddd d d!ddd"Z"e
jj jj#e"ejd#Z ej$d$d$ej%d%d&d'Z&ed(d)Z'e'gZ(ej)d*d*ej*d+Z+ed,d-d%d*d d.Z,ee,d/Z-ej.d0kre
j./ Z.ndZ.ej0ej1ej2d1e&e(d$d2d3ej3d4d5d6	Z4ej5dureej6ej5d7d8Z7ndZ7eej6d%ej8e7d9Z9ee e+ee4e.e9e-d: dS dS );    N)WandbLogger)OptimizerConfig)	lightning)llm)finetune)SquadDataModule)get_nmt_tokenizer)
NeMoLogger)ModelCheckpoint)MegatronOptimizerModulec                  C   s   t jdd} | jdtdd | jdtdd | jdtd	d
d | jdtd dd | jdtdd | jdtdd | jdtd dd | jdtdd | jdtdd |  S )Nz%Train a small T5 model using NeMo 2.0)descriptionz	--devicesz%Number of devices to use for training)typehelpz--max-stepszNumber of steps to train forz--peftnoneznone | lora)r   defaultr   z
--data-dirzdirectory to finetuning dataz--experiment-dirz-directory to write results and checkpoints toz--experiment-namezname of experimentz--wandb-projectzwandb project namez--checkpoint-pathzPath to checkpoint dirz--index-mapping-dirz$directory to write index mappings to)argparseArgumentParseradd_argumentintstr
parse_args)parser r   `/home/ubuntu/.local/lib/python3.10/site-packages/tests/collections/llm/megatron_t5_finetuning.pyget_args"   s   r   __main__c                 C   s   g | ]}d | dqS )z
<extra_id_>r   ).0ir   r   r   
<listcomp>6   s    r   d   additional_special_tokensmegatronBertWordPieceCase)special_tokensi            )dataset_root
seq_lengthseq_length_decmicro_batch_sizeglobal_batch_size	tokenizernum_workers   i   i   @   gQ?g?gh㈵>)
num_layersencoder_num_layershidden_sizeffn_hidden_sizenum_attention_headskv_channelsinit_method_stdhidden_dropoutattention_dropoutlayernorm_epsilonmake_vocab_size_divisible_bymax_position_embeddings)r-      Flog_all)tensor_model_parallel_sizepipeline_model_parallel_sizepipeline_dtypeckpt_load_optimizerckpt_load_strictnessi  )every_n_train_stepsT)resume_if_existsresume_ignore_no_checkpointresume_from_pathadamgh㈵>)	optimizerlruse_distributed_optimizerbf16weight_decay)configloragpu   2   z
bf16-mixed)	precision)	devices	max_stepsacceleratorstrategy	callbackslog_every_n_stepslimit_val_batchesval_check_intervalpluginsall)nameproject	log_model)r^   use_datetime_versionlog_dirwandb)modelresumedatatrainerpeftlogoptim):r   torchlightning.pytorch.loggersr   megatron.core.optimizerr   nemor   nlnemo.collectionsr   nemo.collections.llm.apir   nemo.collections.llm.t5.datar   3nemo.collections.nlp.modules.common.tokenizer_utilsr   nemo.lightningr	    nemo.lightning.pytorch.callbacksr
   %nemo.lightning.pytorch.optim.megatronr   r   __name__argsr$   ranger-   data_dirrf   t5rd   T5Config	t5_configT5ModelMegatronStrategyfloat32rW   checkpoint_callbackrX   
AutoResumecheckpoint_pathre   
opt_configoptrh   LoRATrainerrT   rU   MegatronMixedPrecisionrg   wandb_projectexperiment_namewandb_loggerexperiment_dirnemo_loggerr   r   r   r   <module>   s   






