o
    }oi                     @   s   d dl Zd dlZd dlZd dlmZmZ d dlmZ d dl	m
Z
 d dlmZmZ d dlmZ d dlmZ d dlmZ d d	lmZ d d
lmZ G dd dZdS )    N)finetunepretrain)MockDataModule)SquadDataModule)Llama31Config70B
LlamaModel)LoRAllama31_70b)Trainer)GarbageCollectionCallback)TimingCallbackc                   @   sp   e Zd Zejdddd Zdd Zdd Zd	d
 Zdd Z	dd Z
dd Zdd Zdd Zdd Zdd ZdS )TestLlama31_70Bclass)scopec                 C   s   t S Nr	   )self r   b/home/ubuntu/.local/lib/python3.10/site-packages/tests/collections/llm/recipes/test_llama31_70b.pyrecipe_module   s   zTestLlama31_70B.recipe_modulec                 C   s\   |  }t|tjsJ |jtksJ t|jtjsJ |jjtks$J |jjdks,J d S )N    )	model
isinstancerunConfig__fn_or_cls__r   configr   
seq_length)r   r   model_configr   r   r   
test_model#   s   zTestLlama31_70B.test_modelc                 C   s   |  }t|tjsJ |jtksJ |jdksJ |jdks!J |jdks(J |j	dks/J t|j
tjs8J |j
jjdksAJ |j
jdksIJ |j
jdksQJ |j
jtjksZJ |j
jdksbJ |j
jdksjJ |j
jdu srJ d S )	Ngpu      i{ MegatronStrategy      T)trainerr   r   r   r   r   acceleratordevices	num_nodes	max_stepsstrategy__name__tensor_model_parallel_sizepipeline_model_parallel_sizepipeline_dtypetorchbfloat16$virtual_pipeline_model_parallel_sizecontext_parallel_sizesequence_parallel)r   r   trainer_configr   r   r   test_trainer+   s   zTestLlama31_70B.test_trainerc                 C   s   |  }t|tjsJ |jtksJ t|jtjsJ |jjtks$J t|j	tjs-J |j	jt
ks5J t|jtjs>J |jjtksFJ |jjdksNJ |jjdksVJ |jjdks^J d S )Nr   i      )pretrain_reciper   r   Partialr   r   r   r   r   r&   r   datar   r   global_batch_sizemicro_batch_sizer   r   reciper   r   r   test_pretrain_recipe>   s   z$TestLlama31_70B.test_pretrain_recipec                 C   s   |  }t|tjsJ |jtksJ t|jtjsJ |jjtks$J t|j	tjs-J |j	jt
ks5J t|jtjs>J |jjtksFJ |jjdksNJ t|jtjsWJ |jjtks_J |jjdksgJ |jjdksoJ |jjjdksxJ d S )Ni          g-C6?)finetune_reciper   r   r9   r   r   r   r   r   r&   r   r:   r   r   peftr   dimalphaoptimr   lrr=   r   r   r   test_finetune_recipeL   s   z$TestLlama31_70B.test_finetune_recipec                 C   sV   |j dd}|jjdksJ |jjddiksJ t|jds J |jjjdks)J d S )NT)packed_sequencei   pad_to_max_lengthpacked_sequence_specs)rB   r:   r   dataset_kwargshasattrrK   packed_sequence_sizer=   r   r   r   )test_finetune_recipe_with_packed_sequence]   s
   z9TestLlama31_70B.test_finetune_recipe_with_packed_sequencec                 C   s   | | }tdd |jjD sJ tdd |jjD }|jdu s&J |jdu s-J |jdks4J |j	du s;J |j
du sBJ d S )Nc                 s   s(    | ]}t |tjo|jjd kV  qdS MegatronCommOverlapCallbackNr   r   r   r   r,   .0cbr   r   r   	<genexpr>f   s
    
zJTestLlama31_70B.test_pretrain_performance_optimizations.<locals>.<genexpr>c                 s   s,    | ]}t |tjr|jjd kr|V  qdS rP   rR   rS   r   r   r   rV   k   s    
T2   F)"pretrain_performance_optimizationsr8   anyr&   	callbacksnexttp_comm_overlapdefer_embedding_wgrad_computewgrad_deferral_limit(overlap_param_gather_with_optimizer_stepalign_param_gather)r   r   r>   comm_overlap_cbr   r   r   'test_pretrain_performance_optimizationsd   s   z7TestLlama31_70B.test_pretrain_performance_optimizationsc                 C   sb   |j d d}|jjjdksJ |jjjdksJ |jjjdks!J t|dr-|j	d u s/J d S d S )Npeft_schemer!   r"   gh㈵>rC   )
rB   r&   r+   r-   r.   rF   r   rG   rM   rC   r=   r   r   r   !test_finetune_recipe_without_peftv   s
    z1TestLlama31_70B.test_finetune_recipe_without_peftc                 C   s@   t jtdd |jdd W d    d S 1 sw   Y  d S )Nz(Unrecognized peft scheme: invalid_scheme)matchinvalid_schemerc   )pytestraises
ValueErrorrB   )r   r   r   r   r   &test_finetune_recipe_with_invalid_peft}   s   "z6TestLlama31_70B.test_finetune_recipe_with_invalid_peftc                 C   s   |j dd d}|jjjdksJ |jjjdksJ |jjjdks"J |jjjdu s+J tdd |jjD s8J tdd |jjD sEJ d S )NTperformance_moderd   r"   r$   c                 s   &    | ]}t |tjo|jtkV  qd S r   )r   r   r   r   r   rS   r   r   r   rV      s    
zJTestLlama31_70B.test_finetune_performance_optimizations.<locals>.<genexpr>c                 s   rn   r   )r   r   r   r   r   rS   r   r   r   rV      s
    
)	rB   r&   r+   r-   r.   r2   r4   rY   rZ   r=   r   r   r   'test_finetune_performance_optimizations   s   z7TestLlama31_70B.test_finetune_performance_optimizationsc                 C   sZ   |j ddd}|jjdgksJ |jjjdksJ |jjjdks"J |jjjdks+J d S )NTlorarl   
linear_qkvr%   r"   r$   )rB   rC   target_modulesr&   r+   r-   r.   r2   r=   r   r   r   1test_finetune_performance_optimizations_with_peft   s
   zATestLlama31_70B.test_finetune_performance_optimizations_with_peftN)r,   
__module____qualname__rh   fixturer   r   r6   r?   rH   rO   rb   re   rk   ro   rs   r   r   r   r   r      s    

r   )nemo_runr   rh   r0   nemo.collections.llm.apir   r   "nemo.collections.llm.gpt.data.mockr   #nemo.collections.llm.gpt.data.squadr   $nemo.collections.llm.gpt.model.llamar   r   nemo.collections.llm.peft.lorar   nemo.collections.llm.recipesr
   nemo.lightningr   3nemo.lightning.pytorch.callbacks.garbage_collectionr   nemo.utils.exp_managerr   r   r   r   r   r   <module>   s   