o
    }oi                     @   s   d dl Zd dlZd dlZd dlmZmZ d dlmZ d dl	m
Z
 d dlmZmZ d dlmZ d dlmZ d dlmZmZ d d	lmZ d d
lmZ G dd dZdS )    N)finetunepretrain)MockDataModule)SquadDataModule)Llama3Config70B
LlamaModel)LoRA
llama3_70b)
AutoResumeTrainer)MegatronCommOverlapCallback)TimingCallbackc                   @   sr   e Zd Zejdddd Zdd Zdd Zd	d
 Zdd Z	ej
dg ddd Zdd Zdd Zdd ZdS )TestLlama3_70Bclass)scopec                 C   s   t S Nr	   )self r   a/home/ubuntu/.local/lib/python3.10/site-packages/tests/collections/llm/recipes/test_llama3_70b.pyrecipe_module   s   zTestLlama3_70B.recipe_modulec                 C   sL   |  }t|tjsJ |jtksJ t|jtjsJ |jjtks$J d S r   )model
isinstancerunConfig__fn_or_cls__r   configr   )r   r   model_configr   r   r   
test_model#   s
   zTestLlama3_70B.test_modelc                 C   s   |  }t|tjsJ |jtksJ |jdksJ |jdks!J |jdks(J t|j	tjs1J |j	jj
dks:J |j	jdksBJ |j	jdksJJ |j	jtjksSJ |j	jdks[J |j	jdkscJ |j	jdu skJ d S )Ngpu      MegatronStrategy      T)trainerr   r   r   r   r   acceleratordevices	num_nodesstrategy__name__tensor_model_parallel_sizepipeline_model_parallel_sizepipeline_dtypetorchbfloat16$virtual_pipeline_model_parallel_sizecontext_parallel_sizesequence_parallelr   r   trainer_configr   r   r   test_trainer*   s   zTestLlama3_70B.test_trainerc                 C   s   |  }t|tjsJ |jtksJ t|jtjsJ |jjtks$J t|j	tjs-J |j	jt
ks5J t|jtjs>J |jjtksFJ |jjdksNJ |jjdksVJ |jjdks^J d S )N    i      )pretrain_reciper   r   Partialr   r   r   r   r   r%   r   datar   
seq_lengthglobal_batch_sizemicro_batch_sizer   r   reciper   r   r   test_pretrain_recipe<   s   z#TestLlama3_70B.test_pretrain_recipec                 C   s   |  }t|tjsJ |jtksJ t|jtjsJ |jjtks$J t|j	tjs-J |j	jt
ks5J t|jtjs>J |jjtksFJ |jjdksNJ |jjdksVJ |jjdks^J t|jtjsgJ |jjtksoJ d S )Ni      r7   )finetune_reciper   r   r9   r   r   r   r   r   r%   r   r:   r   r;   r<   r=   peftr   r>   r   r   r   test_finetune_recipeJ   s   z#TestLlama3_70B.test_finetune_recipeznum_nodes,num_gpus_per_node))r7   r    )r$   r!   )r!   r$   c                 C   s2   |j ||d}|jj|ksJ |jj|ksJ d S )N)r(   num_gpus_per_node)r8   r%   r(   r'   )r   r   r(   rE   r?   r   r   r   2test_pretrain_recipe_with_different_configurationsZ   s   zATestLlama3_70B.test_pretrain_recipe_with_different_configurationsc                 C   s*   |j dd}tdd |jjD sJ d S )NT)performance_modec                 s   s&    | ]}t |tjo|jtkV  qd S r   )r   r   r   r   r   ).0cbr   r   r   	<genexpr>b   s
    
zITestLlama3_70B.test_pretrain_performance_optimizations.<locals>.<genexpr>)r8   anyr%   	callbacksr>   r   r   r   'test_pretrain_performance_optimizations`   s   z6TestLlama3_70B.test_pretrain_performance_optimizationsc                 C   sV   |j ddddd}|jjdksJ |jjdksJ |jjdks!J |jjdu s)J d S )Nr    r$   r!   F)tensor_parallelismpipeline_parallelismcontext_parallelismsequence_parallelism)r%   r)   r+   r,   r1   r2   r3   r   r   r    test_trainer_parallelism_optionsg   s   z/TestLlama3_70B.test_trainer_parallelism_optionsc                 C   sJ   |  }|j}|jdksJ |jdksJ |jdksJ |jdks#J d S )NP   r6   @   )r   r   
num_layershidden_sizenum_attention_headsr;   )r   r   r   llama_configr   r   r   test_model_config_parametersp   s   z+TestLlama3_70B.test_model_config_parametersN)r*   
__module____qualname__pytestfixturer   r   r5   r@   rD   markparametrizerF   rM   rR   rY   r   r   r   r   r      s    


	r   )nemo_runr   r\   r.   nemo.collections.llm.apir   r   "nemo.collections.llm.gpt.data.mockr   #nemo.collections.llm.gpt.data.squadr   $nemo.collections.llm.gpt.model.llamar   r   nemo.collections.llm.peft.lorar   nemo.collections.llm.recipesr
   nemo.lightningr   r   6nemo.lightning.pytorch.callbacks.megatron_comm_overlapr   nemo.utils.exp_managerr   r   r   r   r   r   <module>   s   