# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from nemo.collections.llm.gpt.model.baichuan import Baichuan2Config, Baichuan2Config7B, Baichuan2Model
from nemo.collections.llm.gpt.model.base import (
    GPTConfig,
    GPTConfig5B,
    GPTConfig7B,
    GPTConfig20B,
    GPTConfig40B,
    GPTConfig126M,
    GPTConfig175B,
    GPTModel,
    MaskedTokenLossReduction,
    gpt_data_step,
    gpt_forward_step,
    local_layer_spec,
    transformer_engine_full_layer_spec,
    transformer_engine_layer_spec,
)
from nemo.collections.llm.gpt.model.chatglm import ChatGLM2Config6B, ChatGLM3Config6B, ChatGLMConfig, ChatGLMModel
from nemo.collections.llm.gpt.model.deepseek import (
    DeepSeekModel,
    DeepSeekV2Config,
    DeepSeekV2LiteConfig,
    DeepSeekV3Config,
)
from nemo.collections.llm.gpt.model.gemma import (
    CodeGemmaConfig2B,
    CodeGemmaConfig7B,
    GemmaConfig,
    GemmaConfig2B,
    GemmaConfig7B,
    GemmaModel,
)
from nemo.collections.llm.gpt.model.gemma2 import (
    Gemma2Config,
    Gemma2Config2B,
    Gemma2Config9B,
    Gemma2Config27B,
    Gemma2Model,
)
from nemo.collections.llm.gpt.model.gemma3 import (
    Gemma3Config,
    Gemma3Config1B,
    Gemma3Config4B,
    Gemma3Config12B,
    Gemma3Config27B,
    Gemma3Model,
)
from nemo.collections.llm.gpt.model.hf_auto_model_for_causal_lm import HFAutoModelForCausalLM
from nemo.collections.llm.gpt.model.hf_llama_embedding import get_llama_bidirectional_hf_model
from nemo.collections.llm.gpt.model.hyena import (
    Hyena1bConfig,
    Hyena7bARCLongContextConfig,
    Hyena7bConfig,
    Hyena40bARCLongContextConfig,
    Hyena40bConfig,
    HyenaConfig,
    HyenaModel,
    HyenaNV1bConfig,
    HyenaNV7bConfig,
    HyenaNV40bConfig,
    HyenaNVTestConfig,
    HyenaTestConfig,
)
from nemo.collections.llm.gpt.model.llama import (
    CodeLlamaConfig7B,
    CodeLlamaConfig13B,
    CodeLlamaConfig34B,
    CodeLlamaConfig70B,
    Llama2Config7B,
    Llama2Config13B,
    Llama2Config70B,
    Llama3Config8B,
    Llama3Config70B,
    Llama4Config,
    Llama4Experts16Config,
    Llama4Experts128Config,
    Llama31Config8B,
    Llama31Config70B,
    Llama31Config405B,
    Llama32Config1B,
    Llama32Config3B,
    LlamaConfig,
    LlamaModel,
    MLPerfLoRALlamaModel,
)
from nemo.collections.llm.gpt.model.llama_embedding import (
    Llama32EmbeddingConfig1B,
    Llama32EmbeddingConfig3B,
    LlamaEmbeddingModel,
)
from nemo.collections.llm.gpt.model.llama_nemotron import (
    Llama31Nemotron70BConfig,
    Llama31NemotronNano8BConfig,
    Llama31NemotronUltra253BConfig,
    Llama33NemotronSuper49BConfig,
    LlamaNemotronModel,
)
from nemo.collections.llm.gpt.model.mistral import MistralConfig7B, MistralModel, MistralNeMoConfig12B
from nemo.collections.llm.gpt.model.mixtral import (
    MixtralConfig,
    MixtralConfig8x3B,
    MixtralConfig8x7B,
    MixtralConfig8x22B,
    MixtralModel,
)
from nemo.collections.llm.gpt.model.nemotron import (
    Nemotron3Config4B,
    Nemotron3Config8B,
    Nemotron3Config22B,
    Nemotron4Config15B,
    Nemotron4Config340B,
    NemotronConfig,
    NemotronModel,
)
from nemo.collections.llm.gpt.model.phi3mini import Phi3Config, Phi3ConfigMini, Phi3Model
from nemo.collections.llm.gpt.model.qwen2 import (
    Qwen2Config,
    Qwen2Config1P5B,
    Qwen2Config7B,
    Qwen2Config72B,
    Qwen2Config500M,
    Qwen2Model,
    Qwen25Config1P5B,
    Qwen25Config3B,
    Qwen25Config7B,
    Qwen25Config14B,
    Qwen25Config32B,
    Qwen25Config72B,
    Qwen25Config500M,
)
from nemo.collections.llm.gpt.model.qwen3 import (
    Qwen3Config,
    Qwen3Config1P7B,
    Qwen3Config4B,
    Qwen3Config8B,
    Qwen3Config14B,
    Qwen3Config30B_A3B,
    Qwen3Config32B,
    Qwen3Config235B_A22B,
    Qwen3Config600M,
    Qwen3Model,
)
from nemo.collections.llm.gpt.model.reranker import Llama32Reranker1BConfig, Llama32Reranker500MConfig, ReRankerModel
from nemo.collections.llm.gpt.model.ssm import (
    BaseMambaConfig1_3B,
    BaseMambaConfig2_7B,
    BaseMambaConfig130M,
    BaseMambaConfig370M,
    BaseMambaConfig780M,
    MambaModel,
    NemotronHConfig4B,
    NemotronHConfig8B,
    NemotronHConfig47B,
    NemotronHConfig56B,
    NemotronNano9Bv2,
    NemotronNano12Bv2,
    NVIDIAMambaConfig8B,
    NVIDIAMambaHybridConfig8B,
    SSMConfig,
)
from nemo.collections.llm.gpt.model.starcoder import StarcoderConfig, StarcoderConfig15B, StarcoderModel
from nemo.collections.llm.gpt.model.starcoder2 import (
    Starcoder2Config,
    Starcoder2Config3B,
    Starcoder2Config7B,
    Starcoder2Config15B,
    Starcoder2Model,
)

__all__ = [
    "GPTConfig",
    "GPTConfig5B",
    "GPTConfig7B",
    "GPTConfig20B",
    "GPTConfig40B",
    "GPTConfig126M",
    "GPTConfig175B",
    "GPTModel",
    "MistralConfig7B",
    "MistralModel",
    "MistralNeMoConfig12B",
    "MixtralConfig8x3B",
    "MixtralConfig8x7B",
    "MixtralConfig8x22B",
    "MixtralConfig",
    "MixtralModel",
    "Starcoder2Config",
    "Starcoder2Model",
    "Starcoder2Config15B",
    "Starcoder2Config7B",
    "Starcoder2Config3B",
    "StarcoderConfig",
    "StarcoderConfig15B",
    "StarcoderModel",
    "LlamaConfig",
    "Llama2Config7B",
    "Llama2Config13B",
    "Llama2Config70B",
    "Llama3Config8B",
    "Llama3Config70B",
    "Llama31Config8B",
    "Llama31Config70B",
    "Llama31Config405B",
    "Llama32Config1B",
    "Llama32Config3B",
    "Llama4Experts16Config",
    "Llama4Experts128Config",
    "Llama4Config",
    "LlamaNemotronModel",
    "Llama31NemotronNano8BConfig",
    "Llama33NemotronSuper49BConfig",
    "Llama31NemotronUltra253BConfig",
    "Llama31Nemotron70BConfig",
    "Llama32Reranker1BConfig",
    "Llama32Reranker500MConfig",
    "NemotronConfig",
    "Nemotron3Config4B",
    "Nemotron3Config8B",
    "Nemotron4Config15B",
    "Nemotron3Config22B",
    "Nemotron4Config340B",
    "NemotronModel",
    "LlamaEmbeddingModel",
    "Llama32EmbeddingConfig1B",
    "Llama32EmbeddingConfig3B",
    "Phi3Config",
    "Phi3ConfigMini",
    "Phi3Model",
    "CodeLlamaConfig7B",
    "CodeLlamaConfig13B",
    "CodeLlamaConfig34B",
    "CodeLlamaConfig70B",
    "GemmaConfig",
    "GemmaConfig2B",
    "GemmaConfig7B",
    "CodeGemmaConfig2B",
    "CodeGemmaConfig7B",
    "GemmaModel",
    "Gemma2Config",
    "Gemma2Config27B",
    "Gemma2Config2B",
    "Gemma2Config9B",
    "Gemma2Model",
    "Gemma3Config",
    "Gemma3Config1B",
    "Gemma3Config4B",
    "Gemma3Config12B",
    "Gemma3Config27B",
    "Gemma3Model",
    "LlamaModel",
    "MLPerfLoRALlamaModel",
    "Baichuan2Config",
    "Baichuan2Config7B",
    "Baichuan2Model",
    "ChatGLMConfig",
    "ChatGLM2Config6B",
    "ChatGLM3Config6B",
    "ChatGLMModel",
    "Qwen2Config",
    "Qwen2Config500M",
    "Qwen2Config1P5B",
    "Qwen25Config3B",
    "Qwen2Config7B",
    "Qwen2Config72B",
    "Qwen25Config72B",
    "Qwen25Config32B",
    "Qwen25Config14B",
    "Qwen25Config7B",
    "Qwen25Config500M",
    "Qwen25Config1P5B",
    "Qwen2Model",
    "Qwen3Config",
    "Qwen3Config600M",
    "Qwen3Config1P7B",
    "Qwen3Config4B",
    "Qwen3Config8B",
    "Qwen3Config14B",
    "Qwen3Config32B",
    "Qwen3Config30B_A3B",
    "Qwen3Config235B_A22B",
    "Qwen3Model",
    "ReRankerModel",
    "SSMConfig",
    "BaseMambaConfig130M",
    "BaseMambaConfig370M",
    "BaseMambaConfig780M",
    "BaseMambaConfig1_3B",
    "BaseMambaConfig2_7B",
    "NVIDIAMambaConfig8B",
    "NVIDIAMambaHybridConfig8B",
    "NemotronHConfig4B",
    "NemotronHConfig8B",
    "NemotronHConfig47B",
    "NemotronHConfig56B",
    "NemotronNano9Bv2",
    "NemotronNano12Bv2",
    "MambaModel",
    "DeepSeekModel",
    "DeepSeekV2Config",
    "DeepSeekV2LiteConfig",
    "DeepSeekV3Config",
    "MaskedTokenLossReduction",
    "gpt_data_step",
    "gpt_forward_step",
    "transformer_engine_layer_spec",
    "transformer_engine_full_layer_spec",
    "local_layer_spec",
    "HFAutoModelForCausalLM",
    "get_llama_bidirectional_hf_model",
    "HyenaTestConfig",
    "Hyena1bConfig",
    "HyenaNV1bConfig",
    "Hyena7bConfig",
    "Hyena40bConfig",
    "Hyena7bARCLongContextConfig",
    "Hyena40bARCLongContextConfig",
    "HyenaNVTestConfig",
    "HyenaNV40bConfig",
    "HyenaNV7bConfig",
    "HyenaConfig",
    "HyenaModel",
]
