o
    }oi|!                     @   s   d dl Zd dlZd dlZd dlmZmZ d dlmZm	Z	 d dl
mZ d dlmZ d dlmZ d dlmZ d dlmZ d d	lmZ G d
d dZdS )    N)Gemma3Config1BGemma3Model)finetunepretrain)MockDataModule)SquadDataModule)LoRA	gemma3_1b)Trainer)TimingCallbackc                   @   s   e Zd Zejdddd Zdd Zdd Zd	d
 Zdd Z	ej
dg ddd Zdd Zdd Zdd Zdd Zdd ZdS )TestGemma3_1Bclass)scopec                 C   s   t S Nr	   )self r   `/home/ubuntu/.local/lib/python3.10/site-packages/tests/collections/llm/recipes/test_gemma3_1b.pyrecipe_module   s   zTestGemma3_1B.recipe_modulec                 C   sx  |  }t|tjsJ |jtksJ t|jtjsJ |jjtks$J |j}|jdks.J |j	dks5J |j
dks<J |jdksCJ |jdksJJ |jdksQJ |jdksXJ |jdks_J |jd	ksfJ |jd
ksmJ |jdkstJ |jdu s{J |jdksJ |jdu sJ |jdksJ |jdu sJ |jdksJ |jdksJ |jdu sJ |jdu sJ |jdksJ d S )N   i           i      )i'  i@B g      ?    RMSNormTgư>ropeFg        i   )model
isinstancerunConfig__fn_or_cls__r   configr   
num_layershidden_sizenum_attention_headsnum_query_groupskv_channelsffn_hidden_sizewindow_sizerotary_baserope_scaling_factor
seq_lengthnormalizationlayernorm_zero_centered_gammalayernorm_epsilongated_linear_unitposition_embedding_typeadd_bias_linearhidden_dropoutattention_dropout#share_embeddings_and_output_weightsis_vision_language
vocab_size)r   r   model_configr"   r   r   r   
test_model"   s6   zTestGemma3_1B.test_modelc                 C   s  |  }t|tjsJ |jtksJ |jdksJ |jdks!J |jdks(J |j	dks/J t|j
tjs8J |j
jjdksAJ |j
jdksIJ |j
jdksQJ |j
jd u sYJ |j
jd u saJ |j
jdksiJ |j
jdu sqJ |j
jdu syJ |j
jdu sJ |j
jdu sJ |j
jjdu sJ |j
jjdu sJ |j
jjdu sJ |j
jjdu sJ |j
jjdu sJ |jdksJ |jdksJ |jd	ksJ |jdksJ |jdu sJ |j d
ksJ d S )Ngpu   r   
   MegatronStrategyFT2       i  )!gemma3_trainerr   r   r    r!   r   acceleratordevices	num_nodes	max_stepsstrategy__name__tensor_model_parallel_sizepipeline_model_parallel_sizepipeline_dtype$virtual_pipeline_model_parallel_sizecontext_parallel_sizesequence_parallelgradient_as_bucket_viewckpt_async_saveckpt_parallel_loadddpcheck_for_nan_in_gradgrad_reduce_in_fp32overlap_grad_reduceoverlap_param_gatheraverage_in_collectiveaccumulate_grad_batcheslimit_test_batcheslimit_val_batcheslog_every_n_stepsuse_distributed_samplerval_check_intervalr   r   trainer_configr   r   r   test_trainerA   s:   zTestGemma3_1B.test_trainerc                 C   s   |  }t|tjsJ |jtksJ t|jtjsJ |jjtks$J t|j	tjs-J |j	jt
ks5J t|jtjs>J |jjtksFJ |jjdksNJ |jjdksVJ |jjdks^J d S )Nr   r   r   )pretrain_reciper   r   Partialr!   r   r   r    r   trainerr   datar   r,   global_batch_sizemicro_batch_sizer   r   reciper   r   r   test_pretrain_recipef   s   z"TestGemma3_1B.test_pretrain_recipec                 C   s  |  }t|tjsJ |jtksJ t|jtjsJ |jjtks$J t|j	tjs-J |j	jt
ks5J t|jtjs>J |jjtksFJ |jjdksNJ t|jtjsWJ |jjtks_J |jjdksgJ |jjdksoJ |jjjdksxJ |jjjdu sJ d S )Ni   r;      g-C6?F)finetune_reciper   r   r`   r!   r   r   r    r   ra   r   rb   r   r,   peftr   dimalphaoptimr"   lruse_distributed_optimizerre   r   r   r   test_finetune_recipet   s    z"TestGemma3_1B.test_finetune_recipeznum_nodes,num_gpus_per_node))r   r;   )   r   )r   rq   c                 C   s2   |j ||d}|jj|ksJ |jj|ksJ d S )N)rC   num_gpus_per_node)r_   ra   rC   rB   )r   r   rC   rr   rf   r   r   r   2test_pretrain_recipe_with_different_configurations   s   z@TestGemma3_1B.test_pretrain_recipe_with_different_configurationsc                 C   s&   |  }tdd |jjD sJ d S )Nc                 s   s&    | ]}t |tjo|jtkV  qd S r   )r   r   r    r!   r   ).0cbr   r   r   	<genexpr>   s    
zHTestGemma3_1B.test_pretrain_performance_optimizations.<locals>.<genexpr>)r_   anyra   	callbacksre   r   r   r   'test_pretrain_performance_optimizations   s   z5TestGemma3_1B.test_pretrain_performance_optimizationsc                 C   s~   |j ddtjdddd}|jjdksJ |jjdksJ |jjtjks%J |jjdks-J |jjdks5J |jj	du s=J d S )Nrq   r   T)tensor_parallelismpipeline_parallelismpipeline_parallelism_typevirtual_pipeline_parallelismcontext_parallelismsequence_parallelism)
r@   torchbfloat16rE   rG   rH   rI   rJ   rK   rL   r\   r   r   r    test_trainer_parallelism_options   s   z.TestGemma3_1B.test_trainer_parallelism_optionsc                 C   sL   |j d d}|jjjdksJ t|dr|jd u sJ |jjjdks$J d S )Npeft_schemegh㈵>rj   r   )	ri   rm   r"   rn   hasattrrj   ra   rE   rG   re   r   r   r   !test_finetune_recipe_without_peft   s   z/TestGemma3_1B.test_finetune_recipe_without_peftc                 C   s@   t jtdd |jdd W d    d S 1 sw   Y  d S )Nz(Unrecognized peft scheme: invalid_scheme)matchinvalid_schemer   )pytestraises
ValueErrorri   )r   r   r   r   r   &test_finetune_recipe_with_invalid_peft   s   "z4TestGemma3_1B.test_finetune_recipe_with_invalid_peftc                 C   sV   |j dd}|jjdksJ |jjddiksJ |jjd us J |jjjdks)J d S )NT)packed_sequencei   pad_to_max_length)ri   rb   r,   dataset_kwargspacked_sequence_specspacked_sequence_sizere   r   r   r   )test_finetune_recipe_with_packed_sequence   s
   z7TestGemma3_1B.test_finetune_recipe_with_packed_sequenceN)rF   
__module____qualname__r   fixturer   r9   r^   rg   rp   markparametrizers   ry   r   r   r   r   r   r   r   r   r      s    

%
r   )nemo_runr   r   r   nemo.collections.llmr   r   nemo.collections.llm.apir   r   "nemo.collections.llm.gpt.data.mockr   #nemo.collections.llm.gpt.data.squadr   nemo.collections.llm.peft.lorar   nemo.collections.llm.recipesr
   nemo.lightningr   nemo.utils.exp_managerr   r   r   r   r   r   <module>   s   