o
    }oi                      @   s<   d dl mZmZmZmZ dd Zdd Zdd Zdd	 Zd
S )    )Gemma3Config1BGemma3Config4BGemma3Config12BGemma3Config27Bc                  C   s0  t  } | jdks
J | jdksJ | jdksJ | jdksJ | jdks&J | jdks-J | jdks4J | jdks;J | j	d	ksBJ | j
d
ksIJ | jdksPJ | jdu sWJ | jdks^J | jdu seJ | jdkslJ | jdu ssJ | jdkszJ | jdksJ | jdu sJ | jdu sJ | jdksJ d S )N   i           i   i   i'  i@B g      ?i   RMSNormTư>ropeF        i   )r   
num_layershidden_sizenum_attention_headsnum_query_groupskv_channelsffn_hidden_sizewindow_sizerotary_baserope_scaling_factor
seq_lengthnormalizationlayernorm_zero_centered_gammalayernorm_epsilongated_linear_unitposition_embedding_typeadd_bias_linearhidden_dropoutattention_dropout#share_embeddings_and_output_weightsis_vision_language
vocab_sizeconfig r&   _/home/ubuntu/.local/lib/python3.10/site-packages/tests/collections/llm/gpt/model/test_gemma3.pytest_gemma3_1b_config   ,   r(   c                  C   "  t  } | jdks
J | jdksJ | jdksJ | jdksJ | jdks&J | jdks-J | jdks4J | jdks;J | j	d	ksBJ | j
d
ksIJ | jdksPJ | jdu sWJ | jdks^J | jdu seJ | jdkslJ | jdu ssJ | jdkszJ | jdksJ | jdu sJ | jdu sJ d S )N"   i 
     r   r	   i (     r
          @   r   Tr   r   Fr   )r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r    r!   r"   r$   r&   r&   r'   test_gemma3_4b_config+   *   r0   c                  C   r*   )N0   i      r,   r	   i <  r-   r
   r.   r/   r   Tr   r   Fr   )r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r    r!   r"   r$   r&   r&   r'   test_gemma3_12b_configC   r1   r4   c                  C   s0  t  } | jdks
J | jdksJ | jdksJ | jdksJ | jdks&J | jdks-J | jdks4J | jdks;J | j	d	ksBJ | j
d
ksIJ | jdksPJ | jdksWJ | jdu s^J | jdkseJ | jdu slJ | jdkssJ | jdu szJ | jdksJ | jdksJ | jdu sJ | jdu sJ d S )N>   i       r3      gP6?i T  r-   r
   r.   r/   r   Tr   r   Fr   )r   r   r   r   r   r   softmax_scaler   r   r   r   r   r   r   r   r   r   r   r   r    r!   r"   r$   r&   r&   r'   test_gemma3_27b_config[   r)   r9   N)	%nemo.collections.llm.gpt.model.gemma3r   r   r   r   r(   r0   r4   r9   r&   r&   r&   r'   <module>   s
   