o
    }oi                     @   s`   d dl mZ d dlmZmZmZmZmZmZ dd Z	dd Z
dd Zd	d
 Zdd Zdd ZdS )    )squared_relu)Nemotron3Config4BNemotron3Config8BNemotron3Config22BNemotron4Config15BNemotron4Config340BNemotronConfigc                  C   s"  t  } | jdks
J | jtksJ | jdksJ | jdu sJ | jdu s&J | jdks-J | jdks4J | j	dks;J | j
du sBJ | jdu sIJ | jdu sPJ | jdu sWJ | jdks^J | jdkseJ | jd	kslJ | jd
kssJ | jdkszJ | jdksJ | jdksJ | jdksJ d S )N	LayerNormropeFg        g      ?T           $           S!uq?)r   normalizationactivation_funcr   position_embedding_type#share_embeddings_and_output_weightsadd_bias_linearhidden_dropoutattention_dropoutrotary_percentmasked_softmax_fusionpersist_layer_normbias_dropout_add_fusionlayernorm_zero_centered_gamma
num_layers
seq_lengthhidden_sizeffn_hidden_sizenum_attention_headsnum_query_groupskv_channelsinit_method_stdconfig r)   a/home/ubuntu/.local/lib/python3.10/site-packages/tests/collections/llm/gpt/model/test_nemotron.pytest_nemotron_config   s*   r+   c                  C   z   t  } | jdks
J | jdksJ | jdksJ | jdksJ | jdks&J | jdks-J | jdks4J | jdks;J d S )	Nr   r   r   r   r   r   r   r   )	r   r   r    r!   r"   r#   r$   r%   r&   r'   r)   r)   r*   test_nemotron3_config_4b3      r-   c                  C   sz   t  } | jdks
J | jdksJ | jdksJ | jdksJ | jdks&J | jdks-J | jdks4J | jdks;J d S )Nr   r   i @  r   g{Gz?)	r   r   r    r!   r"   r#   r$   r%   r&   r'   r)   r)   r*   test_nemotron3_config_8b?   r.   r/   c                  C   sz   t  } | jdks
J | jdksJ | jdksJ | jdksJ | jdks&J | jdks-J | jdks4J | jdks;J d S )N(   r       `  0   r   gMb?)	r   r   r    r!   r"   r#   r$   r%   r&   r'   r)   r)   r*   test_nemotron3_config_22bK   r.   r4   c                  C   r,   )	Nr   r   r1   r2   r3   r   r   r   )	r   r   r    r!   r"   r#   r$   r%   r&   r'   r)   r)   r*   test_nemotron4_config_15bW   r.   r5   c                  C   sz   t  } | jdks
J | jdksJ | jdksJ | jdksJ | jdks&J | jdks-J | jdks4J | jdks;J d S )N`   r   i H  i   r      g o_y?)	r   r   r    r!   r"   r#   r$   r%   r&   r'   r)   r)   r*   test_nemotron4_config_340bc   r.   r8   N)"nemo.collections.llm.fn.activationr   'nemo.collections.llm.gpt.model.nemotronr   r   r   r   r   r   r+   r-   r/   r4   r5   r8   r)   r)   r)   r*   <module>   s    
