o
    }oi                     @   s~   d dl Zd dlZd dlmZmZ d dlmZ d dlm	Z	 d dl
mZmZ d dlmZ d dlmZ d dlmZ G d	d
 d
ZdS )    N)finetunepretrain)MockDataModule)SquadDataModule)DeepSeekModelDeepSeekV3Config)LoRAdeepseek_v3)Trainerc                   @   sH   e Zd Zejdddd Zdd Zdd Zd	d
 Zdd Z	dd Z
dS )TestDeepSeekV3class)scopec                 C   s   t S Nr	   )self r   b/home/ubuntu/.local/lib/python3.10/site-packages/tests/collections/llm/recipes/test_deepseek_v3.pyrecipe_module   s   zTestDeepSeekV3.recipe_modulec                 C   sL   |  }t|tjsJ |jtksJ t|jtjsJ |jjtks$J d S r   )model
isinstancerunConfig__fn_or_cls__r   configr   )r   r   model_configr   r   r   
test_model    s
   zTestDeepSeekV3.test_modelc                 C   s   |  }t|tjsJ |jtksJ t|jtjsJ |jjtks$J t|j	tjs-J |j	jt
ks5J t|jtjs>J |jjtksFJ |jjdksNJ |jjdksVJ |jjdks^J |j	jjdksgJ |j	jjdkspJ |j	jjdksyJ d S )Ni         @   )pretrain_reciper   r   Partialr   r   r   r   r   trainerr   datar   
seq_lengthglobal_batch_sizemicro_batch_sizestrategytensor_model_parallel_sizeexpert_tensor_parallel_sizeexpert_model_parallel_sizer   r   reciper   r   r   test_pretrain_recipe'   s   z#TestDeepSeekV3.test_pretrain_recipec                 C   sT  |  }t|tjsJ |jtksJ t|jtjsJ |jjtks$J t|j	tjs-J |j	jt
ks5J t|jtjs>J |jjtksFJ |jjdksNJ t|jtjsWJ |jjtks_J |jjjdkshJ |jjg dksrJ |j	jjdu s{J |j	jjdksJ |j	jjdksJ |j	jjdksJ |j	jjdksJ |j	jjd	ksJ d S )
Ni   g-C6?)linear_q_down_projlinear_q_up_projlinear_kv_down_projlinear_kv_up_projlinear_projT   r            )finetune_reciper   r   r    r   r   r   r   r   r!   r   r"   r   r#   peftr   optimr   lrtarget_modulesr&   sequence_parallelr'   r)   pipeline_model_parallel_size"num_layers_in_first_pipeline_stage!num_layers_in_last_pipeline_stager*   r   r   r   test_finetune_recipe:   s(   	z#TestDeepSeekV3.test_finetune_recipec                 C   s   |j d d}|jjjdu sJ |jjjdksJ |jjjdks!J |jjjdks*J |jjjdks3J |jjjdks<J |j	j
jdksEJ t|d	rQ|jd u sSJ d S d S )
Npeft_schemeFr   r   r2         gh㈵>r7   )r6   r!   r&   r;   r)   r'   r<   r=   r>   r8   r   r9   hasattrr7   r*   r   r   r   !test_finetune_recipe_without_peftZ   s    z0TestDeepSeekV3.test_finetune_recipe_without_peftc                 C   s@   t jtdd |jdd W d    d S 1 sw   Y  d S )Nz(Unrecognized peft scheme: invalid_scheme)matchinvalid_schemer@   )pytestraises
ValueErrorr6   )r   r   r   r   r   &test_finetune_recipe_with_invalid_pefte   s   "z5TestDeepSeekV3.test_finetune_recipe_with_invalid_peftN)__name__
__module____qualname__rH   fixturer   r   r,   r?   rE   rK   r   r   r   r   r      s    

 r   )nemo_runr   rH   nemo.collections.llm.apir   r   "nemo.collections.llm.gpt.data.mockr   #nemo.collections.llm.gpt.data.squadr   'nemo.collections.llm.gpt.model.deepseekr   r   nemo.collections.llm.peft.lorar   nemo.collections.llm.recipesr
   nemo.lightningr   r   r   r   r   r   <module>   s   