o
    }oi                     @   s   d dl Zd dlZd dlZd dlmZ d dlmZmZ d dl	m
Z
 d dlmZ d dlmZmZ d dlmZ d dlmZ d d	lmZ G d
d dZdS )    N)DistributedDataParallelConfig)finetunepretrain)MockDataModule)SquadDataModule)MixtralConfig8x22BMixtralModel)LoRAmixtral_8x22b)Trainerc                   @   sj   e Zd Zejdddd Zdd Zdd Zd	d
 Zdd Z	ej
dg ddd Zdd Zdd ZdS )TestMixtral8x22Bclass)scopec                 C   s   t S Nr
   )self r   d/home/ubuntu/.local/lib/python3.10/site-packages/tests/collections/llm/recipes/test_mixtral_8x22b.pyrecipe_module   s   zTestMixtral8x22B.recipe_modulec                 C   sL   |  }t|tjsJ |jtksJ t|jtjsJ |jjtks$J d S r   )model
isinstancerunConfig__fn_or_cls__r   configr   )r   r   model_configr   r   r   
test_model"   s
   zTestMixtral8x22B.test_modelc                 C   s4  |  }t|tjsJ |jtksJ |jdksJ |jdks!J |jdks(J t|j	tjs1J |j	jj
dks:J |j	jdksBJ |j	jdksJJ |j	jtjksSJ |j	jdks[J |j	jdkscJ |j	jdu skJ |j	jdkssJ t|j	jtjs}J |j	jjtksJ |j	jjdu sJ |j	jjdu sJ d S )	Ngpu      MegatronStrategy         T)trainerr   r   r   r   r   acceleratordevices	num_nodesstrategy__name__tensor_model_parallel_sizepipeline_model_parallel_sizepipeline_dtypetorchbfloat16$virtual_pipeline_model_parallel_sizecontext_parallel_sizesequence_parallelexpert_model_parallel_sizeddpr   check_for_nan_in_gradgrad_reduce_in_fp32r   r   trainer_configr   r   r   test_trainer)   s&   zTestMixtral8x22B.test_trainerc                 C   s   |  }t|tjsJ |jtksJ t|jtjsJ |jjtks$J t|j	tjs-J |j	jt
ks5J t|jtjs>J |jjtksFJ t|jjtjsPJ |jjdksXJ |jjdks`J |jjdkshJ d S )N   i      )pretrain_reciper   r   Partialr   r   r   r   r   r$   r   datar   r   
seq_lengthglobal_batch_sizemicro_batch_sizer   r   reciper   r   r   test_pretrain_recipeB   s   z%TestMixtral8x22B.test_pretrain_recipec                 C   s  |  }t|tjsJ |jtksJ t|jtjsJ |jjtks$J t|j	tjs-J |j	jt
ks5J t|jtjs>J |jjtksFJ |jjdksNJ |jjdksVJ |jjdks^J t|jtjsgJ |jjtksoJ |jjddgksyJ |jjdksJ d S )Ni      r:   
linear_qkvlinear_proj    )finetune_reciper   r   r<   r   r   r   r   r   r$   r   r=   r   r>   r?   r@   peftr	   target_modulesdimrA   r   r   r   test_finetune_recipeQ   s    z%TestMixtral8x22B.test_finetune_recipeznum_nodes,num_gpus_per_node))r   r   )r   r"   )rG   r!   c                 C   s2   |j ||d}|jj|ksJ |jj|ksJ d S )N)r'   num_gpus_per_node)r;   r$   r'   r&   )r   r   r'   rM   rB   r   r   r   2test_pretrain_recipe_with_different_configurationsc   s   zCTestMixtral8x22B.test_pretrain_recipe_with_different_configurationsc                 C   sh   |j dddddd}|jjdksJ |jjdksJ |jjdks"J |jjdu s*J |jjdks2J d S )Nr"   r!   F)tensor_parallelismpipeline_parallelismcontext_parallelismsequence_parallelismexpert_parallelism)r$   r(   r*   r+   r0   r1   r2   r6   r   r   r    test_trainer_parallelism_optionsi   s   z1TestMixtral8x22B.test_trainer_parallelism_optionsc                 C   sh   |  }|j}t|tjsJ |jdksJ |jdksJ |jdks$J |jdks+J |j	dks2J d S )N8   i   0   r9   r   )
r   r   r   r   r   
num_layershidden_sizenum_attention_headsr>   num_moe_experts)r   r   r   mixtral_configr   r   r   test_model_config_parametersw   s   z-TestMixtral8x22B.test_model_config_parametersN)r)   
__module____qualname__pytestfixturer   r   r8   rC   rL   markparametrizerN   rT   r\   r   r   r   r   r      s    


r   )nemo_runr   r_   r-   megatron.core.distributedr   nemo.collections.llm.apir   r   "nemo.collections.llm.gpt.data.mockr   #nemo.collections.llm.gpt.data.squadr   &nemo.collections.llm.gpt.model.mixtralr   r   nemo.collections.llm.peft.lorar	   nemo.collections.llm.recipesr   nemo.lightningr   r   r   r   r   r   <module>   s   