o
    }oi                     @   s   d dl mZmZ d dlZd dlmZ d dlmZmZm	Z	m
Z
mZ dd Zdd Zd	d
 Zededededdd Zdd ZdS )    )	MagicMockpatchN)get_nv_embedding_layer_spec)Llama32Reranker1BConfigLlama32Reranker500MConfigReRankerBaseConfigReRankerLossReRankerModelc                  C   s   t  } | jdks
J | jdksJ | jdksJ | jdksJ | jdu s&J | jdks-J | jdu s4J | jdu s;J | j	dksBJ | j
d	ksIJ d S )
Nright   2           FfirstTavg      ?)r   truncation_methodnum_hard_negativesce_loss_scalelabel_smoothingin_batch_negativesnegative_sample_strategyadd_bosadd_eos	pool_typetemperatureconfig r   a/home/ubuntu/.local/lib/python3.10/site-packages/tests/collections/llm/gpt/model/test_reranker.pytest_reranker_base_config   s   r   c                  C   s
  t  } | jdks
J | jdksJ | jdksJ | jdksJ | jdks&J | jdks-J | jdks4J | jdks;J | j	d	ksBJ | j
d
u sIJ | jdksPJ | jdu sWJ | jd
u s^J | jdkseJ | jdkslJ | jtkssJ | jjdks{J | jjdksJ d S )N                 r
   r   r   r   Fr   Tr   r   reranker_forward_stepreranker_data_step)r   
num_layershidden_sizenum_attention_headsnum_query_groupsffn_hidden_sizer   r   r   r   r   r   r   r   r   r   transformer_layer_specr   forward_step_fn__name__data_step_fnr   r   r   r   test_llama32_reranker_1b_config+   s&   r0   c                  C   s   t  } | jdks
J | jdksJ | jdksJ | jdksJ | jdks&J | jdks-J | jdks4J | jdks;J | j	d	u sBJ | j
d
ksIJ | jdu sPJ | jd	u sWJ | jdks^J | jdkseJ d S )Nr!   r"   r#   r$   r
   r   r   r   Fr   Tr   r   )r   r(   r)   r*   r+   r   r   r   r   r   r   r   r   r   r   r   r   r   r   !test_llama32_reranker_500m_configF   s   r1   ztorch.distributed.all_reducez torch.distributed.get_world_sizez4megatron.core.parallel_state.get_data_parallel_groupz<megatron.core.parallel_state.get_context_parallel_world_sizec                 C   s   d| _ t |_ d|_ d |_ tddddd}|jdu sJ |jdu s#J |jdks*J |jjdks2J d}d}d| }tj	|| dd	}i }	|
|	|\}
}t|
tjsUJ t|ts\J d
|v sbJ |d
 jdkskJ |   |  |  |  d S )N   FT   g?)validation_stepval_drop_lastr   r   r   cpudevicer   )r2   )return_valuer   r   r4   r5   r   cross_entropy_lossr   torchrandnforward
isinstanceTensordictshapeassert_called_onceassert_called)mock_cp_sizemock_dp_groupmock_world_sizemock_all_reduceloss_fn
batch_sizer   num_tensors_per_exampleforward_outbatchlossmetricsr   r   r   test_reranker_loss[   s.   rO   c                  C   s   t  } t| }d}d}| j}tj|||dd}tj||dd}d| _|||}|j||fks1J d| _|||}|j||fksCJ d| _|||}|j||fksUJ d| _|||}|j||fksgJ d S )	Nr3   
   r6   r7   r   weighted_avgclslast)	r   r	   r(   r;   r<   onesr   poolrA   )r   modelrI   
seq_lengthr(   last_hidden_statesattention_maskpooledr   r   r   test_reranker_model_pooling   s&   r[   )unittest.mockr   r   r;   .nemo.collections.llm.gpt.model.llama_embeddingr   'nemo.collections.llm.gpt.model.rerankerr   r   r   r   r	   r   r0   r1   rO   r[   r   r   r   r   <module>   s   	%