o
    }oi.                     @   s   d dl Zd dlZd dlZd dlmZmZ d dlmZ d dl	m
Z
 d dlmZmZ d dlmZ d dlmZ d dlmZ d d	lmZ G d
d dZdS )    N)finetunepretrain)MockDataModule)SquadDataModule)Llama31Config405B
LlamaModel)LoRAllama31_405b)Trainer)GarbageCollectionCallbackc                   @   s`   e Zd Zejdddd Zdd Zdd Zd	d
 Zdd Z	dd Z
dd Zdd Zdd ZdS )TestLlama31_405Bclass)scopec                 C   s   t S Nr	   )self r   c/home/ubuntu/.local/lib/python3.10/site-packages/tests/collections/llm/recipes/test_llama31_405b.pyrecipe_module   s   zTestLlama31_405B.recipe_modulec                 C   s\   |  }t|tjsJ |jtksJ t|jtjsJ |jjtks$J |jjdks,J d S )N    )	model
isinstancerunConfig__fn_or_cls__r   configr   
seq_length)r   r   model_configr   r   r   
test_model"   s   zTestLlama31_405B.test_modelc                 C   s  |  }t|tjsJ |jtksJ |jdksJ |jdks!J |jdks(J |j	dks/J t|j
tjs8J |j
jjdksAJ |j
jdksIJ |j
jdksQJ |j
jtjksZJ |j
jdksbJ |j
jdksjJ |j
jdu srJ |j
jdu szJ |j
jdu sJ d S )	Ngpu   @   i{ MegatronStrategy      T)trainerr   r   r   r   r   acceleratordevices	num_nodes	max_stepsstrategy__name__tensor_model_parallel_sizepipeline_model_parallel_sizepipeline_dtypetorchbfloat16$virtual_pipeline_model_parallel_sizecontext_parallel_sizesequence_parallel'account_for_embedding_in_pipeline_split"account_for_loss_in_pipeline_split)r   r   trainer_configr   r   r   test_trainer*   s"   zTestLlama31_405B.test_trainerc                 C   s   |  }t|tjsJ |jtksJ t|jtjsJ |jjtks$J t|j	tjs-J |j	jt
ks5J t|jtjs>J |jjtksFJ |jjdksNJ |jjdksVJ |jjdks^J d S )Nr   i      )pretrain_reciper   r   Partialr   r   r   r   r   r%   r   datar   r   global_batch_sizemicro_batch_sizer   r   reciper   r   r   test_pretrain_recipe?   s   z%TestLlama31_405B.test_pretrain_recipec                 C   s(  |  }t|tjsJ |jtksJ t|jtjsJ |jjtks$J t|j	tjs-J |j	jt
ks5J t|jtjs>J |jjtksFJ |jjdksNJ |jjdksVJ t|jtjs_J |jjtksgJ |jjdksoJ |jjdkswJ |jjjdksJ |j	jjdu sJ |j	jjdu sJ d S )Ni             g-C6?T)finetune_reciper   r   r:   r   r   r   r   r   r%   r   r;   r   r   r<   peftr   dimalphaoptimr   lrr*   r4   r5   r>   r   r   r   test_finetune_recipeM   s$   z%TestLlama31_405B.test_finetune_recipec                 C   sr   |j d d}|jjjdksJ |jjjdksJ |jjdks J |jjj	dks)J t
|dr5|jd u s7J d S d S )N)peft_schemer       rA   gh㈵>rE   )rD   r%   r*   r,   r-   r;   r<   rH   r   rI   hasattrrE   r>   r   r   r   !test_finetune_recipe_without_peftc   s    z2TestLlama31_405B.test_finetune_recipe_without_peftc                 C   s   | | }|jjjdu sJ tdd |jjD sJ tdd |jjD }|jdu s/J |j	du s6J |j
dks=J |jdu sDJ d S )NFc                 s   s&    | ]}t |tjo|jtkV  qd S r   )r   r   r   r   r   .0cbr   r   r   	<genexpr>p   s
    
zKTestLlama31_405B.test_pretrain_performance_optimizations.<locals>.<genexpr>c                 s   s,    | ]}t |tjr|jjd kr|V  qdS )MegatronCommOverlapCallbackN)r   r   r   r   r+   rO   r   r   r   rR   t   s    
T2   )"pretrain_performance_optimizationsr9   r%   pluginsgrad_reduce_in_fp32any	callbacksnexttp_comm_overlapdefer_embedding_wgrad_computewgrad_deferral_limit(overlap_param_gather_with_optimizer_step)r   r   r?   comm_overlap_cbr   r   r   'test_pretrain_performance_optimizationsk   s   z8TestLlama31_405B.test_pretrain_performance_optimizationsc                 C   s   |j dd d}|jjjdksJ |jjjdksJ |jjjdu s"J |jjjdu s+J |jjjjdu s5J |jjjj	du s?J |jjjj
du sIJ |jjjjdu sSJ d S )NTperformance_moderK   r    rL   F)rD   r%   r*   r,   r-   r3   rV   rW   ddpoverlap_grad_reduceoverlap_param_gatheraverage_in_collectiver>   r   r   r   'test_finetune_performance_optimizations~   s   z8TestLlama31_405B.test_finetune_performance_optimizationsc                 C   sl   |j ddd}|jjdgksJ |jjjdksJ |jjjdks"J |jjjdks+J |jjjdu s4J d S )NTlorara   
linear_qkvr$   )	rD   rE   target_modulesr%   r*   r,   r-   r1   r3   r>   r   r   r   1test_finetune_performance_optimizations_with_peft   s   zBTestLlama31_405B.test_finetune_performance_optimizations_with_peftN)r+   
__module____qualname__pytestfixturer   r   r7   r@   rJ   rN   r`   rg   rk   r   r   r   r   r      s    

r   )nemo_runr   rn   r/   nemo.collections.llm.apir   r   "nemo.collections.llm.gpt.data.mockr   #nemo.collections.llm.gpt.data.squadr   $nemo.collections.llm.gpt.model.llamar   r   nemo.collections.llm.peft.lorar   nemo.collections.llm.recipesr
   nemo.lightningr   3nemo.lightning.pytorch.callbacks.garbage_collectionr   r   r   r   r   r   <module>   s   