o
    }oi                  	   @   s   d dl Z d dlZd dlmZmZmZmZ dd Ze jdddd Z	d	d
 Z
dd Zdd Zdd Zdd Zdd Zdd Ze jdeddgddggedfeddgddggedfgdd ZdS )    N),BERTInBatchExclusiveHardNegativesRankingLossBERTLossReductionHardNegativeRankingLosssentence_order_prediction_lossc                 C   s&   t dd | D }| t| S )z>Mock function to average losses without distributed operationsc                 S   s   g | ]}|   qS  )clonedetach).0lossr   r   X/home/ubuntu/.local/lib/python3.10/site-packages/tests/collections/llm/bert/test_loss.py
<listcomp>   s    z'mock_average_losses.<locals>.<listcomp>)torchstackmeanrepeatlen)lossesaveraged_lossesr   r   r   mock_average_losses   s   r   T)autousec                 C   sJ   | j ddd | j ddd | j ddd | j dd d | j dtd	 d S )
N<megatron.core.parallel_state.get_context_parallel_world_size   return_valuez9megatron.core.parallel_state.get_data_parallel_world_sizez3megatron.core.parallel_state.get_data_parallel_rankr   z4megatron.core.parallel_state.get_data_parallel_groupzHnemo.collections.llm.bert.loss.average_losses_across_data_parallel_group)side_effect)patchr   )mockerr   r   r   mock_distributed   s   
r   c                  C   s   t dd} d}d}t|d |}i }| j||d\}}t|tjs$J | dks,J d|v s2J t|d | s>J d S )N   )num_hard_negatives      batchforward_outr   avg)	r   r   randnforward
isinstanceTensordimallcloser   loss_fn
batch_size	embed_dimr$   r#   r
   statsr   r   r   test_hard_negative_ranking_loss-   s   
r1   c                  C   s^   t dt dt dg} t| }t d}t ||ds"J |jt dgks-J dS )z>Test to verify the mocked averaging function works as expected      ?g       @g      @   N)r   tensorr   r+   r   shapeSize)r   averagedexpected_meanr   r   r   test_average_losses_mock>   s
   
r9   c                  C   s   t ddd} d}d}t|d |}i }| j||d\}}t|tjs%J | dks-J d	|v s3J t|d	 | s?J d S )
Nr   F)r   global_in_batch_negativesr    r!   r3   r"   r   r%   )	r   r   r&   r'   r(   r)   r*   r+   r   r,   r   r   r   !test_bert_in_batch_negatives_lossH   s   r;   c                  C   sJ   t dddd} | jdksJ | jdksJ | jdksJ t| dr#J dS )z:Test BERTLossReduction with SOP loss enabled (lines 32-38)FT)validation_stepval_drop_lastadd_sop_lossmlmN)r   r<   r=   r>   hasattrr-   r   r   r   test_bert_loss_with_sopY   s
   rB   c                 C   s   t dd}tddtddgd}tddtddtddd}| jd	dd
 |j||d\}}t|tjs=J t|t	sDJ d|v sJJ | jd	dd
 t
jtdd |j||d W d   dS 1 skw   Y  dS )z:Test BERTLossReduction forward with SOP loss (lines 50-71)T)r>   r   r    r   r   )	loss_mask	is_random)lm_lossrC   binary_logitsr   r   r"   r%   z$CP is not supported for SOP loss yet)matchN)r   r   onesr4   r&   r   r'   r(   r)   dictpytestraisesNotImplementedError)r   r-   r#   r$   r
   r0   r   r   r   test_bert_loss_forward_with_sopb   s   
$"rM   c               	   C   sn   t dddddddd} | jdksJ | jdksJ | jdks J | jdks'J | jdks.J | jdks5J dS )	zPTest BERTInBatchExclusiveHardNegativesRankingLoss initialization (lines 213-228)TFr   g      >@g?global)r<   r=   r   scalelabel_smoothingr:   backprop_typeN)r   r<   r=   r   rO   r:   rQ   rA   r   r   r   !test_bert_in_batch_negatives_initx   s   
rR   c                  C   sj   t dddd} d}d}t|d |}i }| j||d\}}t|tjs&J t|ts-J d|v s3J d	S )
zTTest BERTInBatchExclusiveHardNegativesRankingLoss forward validation (lines 278-298)r   T)r   r:   r<   r       r3   r"   r%   N)r   r   r&   r'   r(   r)   rI   r,   r   r   r   %test_bert_in_batch_forward_validation   s   rT   ztensor_input,expectedg      ?g?g?g        r2   c                 C   s<   t ddg}t| |}t|t jsJ | dksJ dS )z3Test sentence_order_prediction_loss (lines 307-314)r   r   N)r   r4   r   r(   r)   r*   )tensor_inputexpectedsentence_orderr
   r   r   r   test_sentence_order_prediction   s   	
rX   )rJ   r   nemo.collections.llm.bert.lossr   r   r   r   r   fixturer   r1   r9   r;   rB   rM   rR   rT   markparametrizer4   rX   r   r   r   r   <module>   s(   


	