o
    }oi
                     @   sN   d dl m  mZ d dlmZmZmZmZ dd Z	dd Z
dd Zd	d
 ZdS )    N)Starcoder2ConfigStarcoder2Config3BStarcoder2Config7BStarcoder2Config15Bc                  C   s  t dddd} | jdksJ | jtjksJ | jdu sJ | jdks$J | jdks+J | jd	ks2J | j	d	ks9J | j
d
ks@J | jdu sGJ | jdksNJ | jdksUJ | jdu s\J | jdu scJ | jdu sjJ | jdksqJ | jdksxJ | jd u sJ d S )N         )
num_layershidden_sizenum_attention_heads	LayerNormTi @  ropeg        g{Gz?F   gh㈵>g      ?)r   normalizationactivation_funcFgeluadd_bias_linear
seq_lengthposition_embedding_typehidden_dropoutattention_dropoutinit_method_std#share_embeddings_and_output_weightskv_channelsnum_query_groupsattention_softmax_in_fp32bias_activation_fusionbias_dropout_fusionlayernorm_epsilonrotary_percentwindow_sizeconfig r$   c/home/ubuntu/.local/lib/python3.10/site-packages/tests/collections/llm/gpt/model/test_starcoder2.pytest_starcoder2_config   s$   r&   c                  C   l   t  } | jdks
J | jdksJ | jdksJ | jdksJ | jdks&J | jdks-J | jdks4J d S )Nr   r   i 0  r      Vy?gR~.A)r   r	   r
   ffn_hidden_sizer   r   r   rotary_baser"   r$   r$   r%   test_starcoder2_config_3b.      r,   c                  C   r'   )N    i   i H  $      r)   i@B )r   r	   r
   r*   r   r   r   r+   r"   r$   r$   r%   test_starcoder2_config_7b9   r-   r1   c                  C   r'   )N(   i   i `  0   r0   g&1?i )r   r	   r
   r*   r   r   r   r+   r"   r$   r$   r%   test_starcoder2_config_15bD   r-   r4   )torch.nn.functionalnn
functionalr   )nemo.collections.llm.gpt.model.starcoder2r   r   r   r   r&   r,   r1   r4   r$   r$   r$   r%   <module>   s   