o
    }oi                      @   s   d dl Zd dlZd dlmZmZ d dlmZ d dlm	Z	 d dl
mZmZ d dlmZ d dlmZ d dlmZ d d	lmZ G d
d dZdS )    N)finetunepretrain)MockDataModule)SquadDataModule)Llama4Experts128Config
LlamaModel)DoRA)LoRAllama4_e128)Trainerc                   @   s   e Zd Zejdddd Zdd Zdd Zd	d
 Zdd Z	ej
dg ddd Zdd Zdd Zdd Zdd Zdd ZdS )TestLlama4_E128class)scopec                 C   s   t S Nr
   )self r   b/home/ubuntu/.local/lib/python3.10/site-packages/tests/collections/llm/recipes/test_llama4_e128.pyrecipe_module   s   zTestLlama4_E128.recipe_modulec                 C   sL   |  }t|tjsJ |jtksJ t|jtjsJ |jjtks$J d S r   )model
isinstancerunConfig__fn_or_cls__r   configr   )r   r   model_configr   r   r   
test_model!   s
   zTestLlama4_E128.test_modelc                 C   s  |  }t|tjsJ |jtksJ |jdksJ |jdks!J |jdks(J |j	dks/J t|j
tjs8J |j
jjdksAJ |j
jdksIJ |j
jdksQJ |j
jd u sYJ |j
jd u saJ |j
jdksiJ |j
jdu sqJ |j
jdksyJ |j
jd	ksJ |j
jdu sJ |j
jdu sJ |j
jdu sJ |jdksJ |jd
ksJ |jdksJ |jdksJ |jdu sJ |jdksJ t|jtjsJ |jjjdksJ d S )Ngpu   @   i{ MegatronStrategy      T   2       
   Fi  MegatronMixedPrecision)trainerr   r   r   r   r   acceleratordevices	num_nodes	max_stepsstrategy__name__tensor_model_parallel_sizepipeline_model_parallel_sizepipeline_dtype$virtual_pipeline_model_parallel_sizecontext_parallel_sizesequence_parallelexpert_tensor_parallel_sizeexpert_model_parallel_sizegradient_as_bucket_viewckpt_async_saveckpt_parallel_loadaccumulate_grad_batcheslimit_test_batcheslimit_val_batcheslog_every_n_stepsuse_distributed_samplerval_check_intervalpluginsr   r   trainer_configr   r   r   test_trainer(   s8   zTestLlama4_E128.test_trainerc                 C   s   |  }t|tjsJ |jtksJ t|jtjsJ |jjtks$J t|j	tjs-J |j	jt
ks5J t|jtjs>J |jjtksFJ |jjdksNJ |jjdksVJ d S )Ni    i   )pretrain_reciper   r   Partialr   r   r   r   r   r(   r   datar   
seq_lengthglobal_batch_sizer   r   reciper   r   r   test_pretrain_recipeL   s   z$TestLlama4_E128.test_pretrain_recipec                 C   s   |  }t|tjsJ |jtksJ t|jtjsJ |jjtks$J t|j	tjs-J |j	jt
ks5J t|jtjs>J |jjtksFJ |jjdksNJ |jjdksVJ t|jtjs_J |jjtksgJ d S )N   r#   )finetune_reciper   r   rE   r   r   r   r   r   r(   r   rF   r   rG   rH   peftr	   rI   r   r   r   test_finetune_recipeY   s   z$TestLlama4_E128.test_finetune_recipeznum_nodes,num_gpus_per_node))r"   r   )   r!   )r!   rP   c                 C   s2   |j ||d}|jj|ksJ |jj|ksJ d S )N)r+   num_gpus_per_node)rD   r(   r+   r*   )r   r   r+   rQ   rJ   r   r   r   2test_pretrain_recipe_with_different_configurationsh   s   zBTestLlama4_E128.test_pretrain_recipe_with_different_configurationsc                 C   sp   |j dd}tdd |jjD sJ tdd |jjD s J tdd |jjD s-J |jjjdu s6J d S )NTperformance_modec                 s       | ]	}|j jd kV  qdS MegatronCommOverlapCallbackNr   r.   .0cbr   r   r   	<genexpr>p       zJTestLlama4_E128.test_pretrain_performance_optimizations.<locals>.<genexpr>c                 s   rU   GarbageCollectionCallbackNrX   rY   r   r   r   r\   q   r]   c                 s   rU   )MegatronTokenDropCallbackNrX   rY   r   r   r   r\   r   r]   F)rD   anyr(   	callbacksr@   grad_reduce_in_fp32rI   r   r   r   'test_pretrain_performance_optimizationsn   s
   z7TestLlama4_E128.test_pretrain_performance_optimizationsc                 C   sz   |j ddddddd}|jjdksJ |jjdksJ |jjdks#J |jjdu s+J |jjdks3J |jjdks;J d S )NrP   r!   Tr   )tensor_parallelismpipeline_parallelismcontext_parallelismsequence_parallelismexpert_tensor_parallelismexpert_model_parallelism)r(   r-   r/   r0   r3   r4   r5   r6   rA   r   r   r    test_trainer_parallelism_optionsu   s   z0TestLlama4_E128.test_trainer_parallelism_optionsc                 C   sh   |j dd}|jjjdksJ tdd |jjD sJ tdd |jjD s)J |jjjdu s2J d S )NTrS   r"   c                 s   rU   rV   rX   rY   r   r   r   r\      r]   zJTestLlama4_E128.test_finetune_performance_optimizations.<locals>.<genexpr>c                 s   rU   r^   rX   rY   r   r   r   r\      r]   F)rM   r(   r-   r/   ra   rb   r@   rc   rI   r   r   r   'test_finetune_performance_optimizations   s
   z7TestLlama4_E128.test_finetune_performance_optimizationsc                 C   s   |j dd}|jjtksJ |jjdksJ |jjdksJ |jjjdks'J |j dd}|jjt	ks5J |jjdks=J |jjdksEJ |jjjdksNJ |j d d}|jjjdks]J t
t |j dd W d    d S 1 stw   Y  d S )	Nlora)peft_schemer      g-C6?doragh㈵>invalid)rM   rN   r   r	   dimalphaoptimr   lrr   pytestraises
ValueErrorrI   r   r   r   test_finetune_peft_options   s   "z*TestLlama4_E128.test_finetune_peft_optionsc                 C   s`   |j dd}|jjdksJ |jjd usJ |jjd du s J |j dd}|jjdks.J d S )NT)packed_sequencei   pad_to_max_lengthFrL   )rM   rF   rG   packed_sequence_specsdataset_kwargsrI   r   r   r   test_packed_sequence_options   s   z,TestLlama4_E128.test_packed_sequence_optionsN)r.   
__module____qualname__rv   fixturer   r   rC   rK   rO   markparametrizerR   rd   rk   rl   ry   r~   r   r   r   r   r      s    

$
r   )nemo_runr   rv   nemo.collections.llm.apir   r   "nemo.collections.llm.gpt.data.mockr   #nemo.collections.llm.gpt.data.squadr   $nemo.collections.llm.gpt.model.llamar   r   nemo.collections.llm.peftr   nemo.collections.llm.peft.lorar	   nemo.collections.llm.recipesr   nemo.lightningr   r   r   r   r   r   <module>   s   