o
    }oi(                     @   s   d dl mZmZmZmZmZmZmZmZm	Z	m
Z
mZmZmZmZ dd Zdd Zdd Zdd	 Zd
d Zdd Zdd Zdd Zdd Zdd Zdd Zdd ZdS )    )BaseMambaConfig1_3BBaseMambaConfig2_7BBaseMambaConfig130MBaseMambaConfig370MBaseMambaConfig780MNemotronHConfig4BNemotronHConfig8BNemotronHConfig47BNemotronHConfig56BNVIDIAMambaConfig8BNVIDIAMambaHybridConfig8B	SSMConfiggpt_data_stepssm_forward_stepc                  C   sv  t  } | jdu s
J | jdu sJ | jdu sJ | jdksJ | jdks&J | jdks-J | jdks4J | jd u s;J | j	du sBJ | j
du sIJ | jdksPJ | jdksWJ | jdks^J | jd	kseJ | jd u slJ | jdu ssJ | jd
kszJ | jdu sJ | jdksJ | jdu sJ | jdksJ | jdksJ | jdksJ | jdu sJ | jtksJ | jtksJ d S )NFT      g            noneg      ?i'     RMSNormgh㈵>)r   fp16_lm_cross_entropyparallel_output#share_embeddings_and_output_weights
num_layersnum_attention_headshybrid_attention_ratiohybrid_mlp_ratiohybrid_override_patternpost_processpre_process
seq_lengthposition_embedding_typerotary_percentrotary_baseseq_len_interpolation_factorapply_rope_fusionmake_vocab_size_divisible_bygated_linear_unitnormalizationadd_bias_linearhidden_dropoutattention_dropoutlayernorm_epsilonget_attention_mask_from_fusionforward_step_fnr   data_step_fnr   config r2   \/home/ubuntu/.local/lib/python3.10/site-packages/tests/collections/llm/gpt/model/test_ssm.pytest_ssm_config!   s6   r4   c                  C      t  } | jdks
J | jdksJ | jdksJ | jdksJ | jdks&J | jdks-J | jdks4J | jdks;J | j	dksBJ d S )	NMMMMMMMMMMMMMMMMMMMMMMMM      i      huggingfaceEleutherAI/gpt-neox-20bbase)
r   r   r   r    hidden_sizeffn_hidden_sizer&   tokenizer_librarytokenizer_namemapping_typer0   r2   r2   r3   test_base_mamba_config_130m?      rB   c                  C   r5   )	N0MMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMM0   r8   i   r9   r:   r;   r<   )
r   r   r   r    r=   r>   r&   r?   r@   rA   r0   r2   r2   r3   test_base_mamba_config_370mL   rC   rF   c                  C   r5   )	NrD   rE   r8   i   r9   r:   r;   r<   )
r   r   r   r    r=   r>   r&   r?   r@   rA   r0   r2   r2   r3   test_base_mamba_config_780mY   rC   rG   c                  C      t  } | jdks
J | jdksJ | jdksJ | jdksJ | jdks&J | jdks-J | jdks4J | jdks;J | j	dksBJ d S )NrD   rE   r8   r9   r:   r;   r<   )
r   r   r   r    r=   r>   r&   r?   r@   rA   r0   r2   r2   r3   test_base_mamba_config_1_3bf   rC   rI   c                  C   r5   )	N@MMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMM@   r8   i 
  r9   r:   r;   r<   )
r   r   r   r    r=   r>   r&   r?   r@   rA   r0   r2   r2   r3   test_base_mamba_config_2_7bs   rC   rL   c                  C   rH   )N8MMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMM8      r   megatronGPTSentencePieceTokenizerznvidia-pure)
r   r   r   r    r=   r>   r&   r?   r@   rA   r0   r2   r2   r3   test_nvidia_mamba_config_8b   rC   rR   c                  C   s   t  } | jdks
J | jdksJ | jdksJ | jdksJ | jdks&J | jdks-J | jdks4J | jdks;J | j	dksBJ | j
d	ksIJ | jd
ksPJ d S )Nz8M-M-M--M-M*-M-M-M-M--M*-M-M-M-M-M*--M-M-M-M-M*-M--M-M-M-rN   rO   i @         r   rP   rQ   znvidia-hybrid)r   r   r   r    r=   r>   r   num_query_groupsr&   r?   r@   rA   r0   r2   r2   r3   "test_nvidia_mamba_hybrid_config_8b   s   rV   c                  C   sZ  t  } | jdks
J | jdksJ | jdksJ | jdksJ | jdks&J | jdks-J | jdks4J | jdks;J | j	dksBJ | j
d	ksIJ | jd
ksPJ | jdksWJ | jdks^J | jdu seJ | jdkslJ | jdkssJ | jdkszJ | jdu sJ | jdu sJ | jdu sJ | jdu sJ | jdksJ | jdu sJ | jdu sJ d S )N4M-M-M-M*-M-M-M-M-M*-M-M-M-M-M*-M-M-M-M-M*-M-M-M-M-M-4   r   i   p   r   rT   rK   i 0  rS   FtiktokenTiktokenTokenizernvidia-hybrid-nemotronhT   )r   r   r   r    r=   mamba_num_headskv_channelsmamba_num_groupsmamba_state_dimmamba_head_dimr>   r   rU   r&   use_mamba_mem_eff_pathr?   r@   rA   masked_softmax_fusionapply_query_key_layer_scalingpersist_layer_normattention_softmax_in_fp32
vocab_sizefirst_last_layers_bf16is_hybrid_modelr0   r2   r2   r3   test_nemotronh_config_4b   s2   rk   c                  C   s0  t  } | jdks
J | jdksJ | jdksJ | jdksJ | jdks&J | jdks-J | jdks4J | jdks;J | j	d	ksBJ | j
dksIJ | jdksPJ | jd
ksWJ | jdks^J | jdkseJ | jdu slJ | jdu ssJ | jdu szJ | jdu sJ | jdksJ | jdu sJ | jdu sJ d S )NrW   rX   r   rO   rT   r   rK   i T  rS   rZ   r[   r\   TFr]   )r   r   r   r    r=   r`   ra   rb   r>   r   rU   r&   r?   r@   rA   rd   re   rf   rg   rh   ri   rj   r0   r2   r2   r3   test_nemotronh_config_8b   s,   rl   c                  C   0  t  } | jdks
J | jdksJ | jdksJ | jdksJ | jdks&J | jdks-J | jdks4J | jdks;J | j	dksBJ | j
dksIJ | jdksPJ | jd	ksWJ | jd
ks^J | jdkseJ | jdu slJ | jdu ssJ | jdu szJ | jdu sJ | jdksJ | jdu sJ | jdu sJ d S )NzbM-M-M-M-M-M-M-M-M*-M-M-M-M-M-M-M-M-M-M*-M-M-M-M-M*-M-M-M-M-M*-M-M-M-M-M-M-M---MM---M-M*-M-M-M-M-M-b   r   rT      rK   i x  r   rZ   r[   r\   TFr]   )r	   r   r   r    r=   r`   ra   rb   r>   r   rU   r&   r?   r@   rA   rd   re   rf   rg   rh   ri   rj   r0   r2   r2   r3   test_nemotronh_config_47b   s0   rp   c                  C   rm   )NzvM-M-M-M*-M-M-M-M-M*-M-M-M-M-M*-M-M-M-M-M*-M-M-M-M-M*-M-M-M-M-M*-M-M-M-M-M*-M-M-M-M-M*-M-M-M-M-M*-M-M-M-M-M*-M-M-M-M-M-v   r   rT   ro   rK   i   r   rZ   r[   r\   TFr]   )r
   r   r   r    r=   r`   ra   rb   r>   r   rU   r&   r?   r@   rA   rd   re   rf   rg   rh   ri   rj   r0   r2   r2   r3   test_nemotronh_config_56b   s0   rr   N)"nemo.collections.llm.gpt.model.ssmr   r   r   r   r   r   r   r	   r
   r   r   r   r   r   r4   rB   rF   rG   rI   rL   rR   rV   rk   rl   rp   rr   r2   r2   r2   r3   <module>   s   @