o
    }oi                     @   s~   d dl Zd dlZd dlmZmZ d dlmZ d dlm	Z	 d dl
mZmZ d dlmZ d dlmZ d dlmZ G d	d
 d
ZdS )    N)finetunepretrain)MockDataModule)SquadDataModule)Llama3Config8B
LlamaModel)LoRA	llama3_8b)Trainerc                   @   sr   e Zd Zejdddd Zdd Zdd Zd	d
 Zdd Z	ej
dg ddd Zdd Zdd Zdd ZdS )TestLlama3_8Bclass)scopec                 C   s   t S Nr	   )self r   `/home/ubuntu/.local/lib/python3.10/site-packages/tests/collections/llm/recipes/test_llama3_8b.pyrecipe_module   s   zTestLlama3_8B.recipe_modulec                 C   sL   |  }t|tjsJ |jtksJ t|jtjsJ |jjtks$J d S r   )model
isinstancerunConfig__fn_or_cls__r   configr   )r   r   model_configr   r   r   
test_model    s
   zTestLlama3_8B.test_modelc                 C   s  |  }t|tjsJ |jtksJ |jdksJ |jdks!J |jdks(J |j	dks/J t|j
tjs8J |j
jjdksAJ |j
jdksIJ |j
jdksQJ |j
jd u sYJ |j
jd u saJ |j
jdksiJ |j
jdu sqJ |j
jdu syJ |j
jdu sJ |j
jdu sJ |jdksJ |jd	ksJ |jd
ksJ |jdksJ |jdu sJ |jdksJ t|jtjsJ |jjjdksJ d S )Ngpu      i{ MegatronStrategy   FT2       
   i  MegatronMixedPrecision)trainerr   r   r   r   r   acceleratordevices	num_nodes	max_stepsstrategy__name__tensor_model_parallel_sizepipeline_model_parallel_sizepipeline_dtype$virtual_pipeline_model_parallel_sizecontext_parallel_sizesequence_parallelgradient_as_bucket_viewckpt_async_saveckpt_parallel_loadaccumulate_grad_batcheslimit_test_batcheslimit_val_batcheslog_every_n_stepsuse_distributed_samplerval_check_intervalpluginsr   r   trainer_configr   r   r   test_trainer'   s4   zTestLlama3_8B.test_trainerc                 C   s   |  }t|tjsJ |jtksJ t|jtjsJ |jjtks$J t|j	tjs-J |j	jt
ks5J t|jtjs>J |jjtksFJ |jjdksNJ |jjdksVJ d S )Ni    i   )pretrain_reciper   r   Partialr   r   r   r   r   r%   r   datar   
seq_lengthglobal_batch_sizer   r   reciper   r   r   test_pretrain_recipeI   s   z"TestLlama3_8B.test_pretrain_recipec                 C   s   |  }t|tjsJ |jtksJ t|jtjsJ |jjtks$J t|j	tjs-J |j	jt
ks5J t|jtjs>J |jjtksFJ |jjdksNJ |jjdksVJ t|jtjs_J |jjtksgJ d S )Ni      )finetune_reciper   r   r@   r   r   r   r   r   r%   r   rA   r   rB   rC   peftr   rD   r   r   r   test_finetune_recipeV   s   z"TestLlama3_8B.test_finetune_recipeznum_nodes,num_gpus_per_node))r   r   )r       )rK   r    c                 C   s2   |j ||d}|jj|ksJ |jj|ksJ d S )N)r(   num_gpus_per_node)r?   r%   r(   r'   )r   r   r(   rL   rE   r   r   r   2test_pretrain_recipe_with_different_configurationse   s   z@TestLlama3_8B.test_pretrain_recipe_with_different_configurationsc                 C   s*   |j dd}tdd |jjD sJ d S )NT)performance_modec                 s   s    | ]	}|j jd kV  qdS )MegatronCommOverlapCallbackN)r   r+   ).0cbr   r   r   	<genexpr>m   s    zHTestLlama3_8B.test_pretrain_performance_optimizations.<locals>.<genexpr>)r?   anyr%   	callbacksrD   r   r   r   'test_pretrain_performance_optimizationsk   s   z5TestLlama3_8B.test_pretrain_performance_optimizationsc                 C   sV   |j ddddd}|jjdksJ |jjdksJ |jjdks!J |jjdu s)J d S )Nr    rK   T)tensor_parallelismpipeline_parallelismcontext_parallelismsequence_parallelism)r%   r*   r,   r-   r0   r1   r<   r   r   r    test_trainer_parallelism_optionso   s   z.TestLlama3_8B.test_trainer_parallelism_optionsc                 C   s<   |  }|j}|jdksJ |jdksJ |jdksJ d S )Nr"   i   )r   r   
num_layershidden_sizenum_attention_heads)r   r   r   llama_configr   r   r   test_model_config_parametersx   s
   z*TestLlama3_8B.test_model_config_parametersN)r+   
__module____qualname__pytestfixturer   r   r>   rF   rJ   markparametrizerM   rU   rZ   r_   r   r   r   r   r      s    

"
	r   )nemo_runr   rb   nemo.collections.llm.apir   r   "nemo.collections.llm.gpt.data.mockr   #nemo.collections.llm.gpt.data.squadr   $nemo.collections.llm.gpt.model.llamar   r   nemo.collections.llm.peft.lorar   nemo.collections.llm.recipesr
   nemo.lightningr   r   r   r   r   r   <module>   s   