o
    wi                     @   s  d dl mZ d dlmZ d dlZd dlmZ d dlm	Z	 d dl
mZ d dlmZ d dlmZ d dlmZ d d	lmZ d d
lmZ d dlmZ d dlmZ d dlmZ ddgZdejdeddfddZ dej!dej!fddZ"				d0dede#de#dede#defddZ$	 	 			 	 	 	!				d1d"ed#e%d$e%d%e%dB d&e%dB d'e%d(e%d)e%d*e#d+e#d,e&dB d-e&dB d.e&dB de'ej(ej)f fd/dZ*dS )2    )partialN)StrictHandling)get_gpt_modelopt_spec)
ModuleSpec)TransformerConfig)	lightning)vlm)get_llama4_layer_spec) _setup_trainer_and_restore_model)ckpt_to_context_subdir)ckpt_to_weights_subdir)logging#set_modelopt_spec_if_exists_in_ckpt2setup_trainer_and_restore_model_with_modelopt_specmodelpathreturnc                 C   sn   t |d}t|ddd }| rt| drdS t| tjr+t| j	 d| j	_
dS tt|  d dS )zdSet model.config.transformer_layer_spec to modelopt spec if modelopt_state exists in the checkpoint.znemo://F)	is_savingmodelopt_statemoduleNz= is not a Llama4OmniModel. Modelopt state will not be loaded.)strremoveprefixr   existshasattr
isinstancer   Llama4OmniModel_set_llama4_modelopt_specconfiggradient_accumulation_fusionr   warningtype)r   r   modelopt_state_path r"   f/home/ubuntu/sommelier/.venv/lib/python3.10/site-packages/nemo/collections/vlm/modelopt/model_utils.pyr   $   s   
	model_cfgc                 C   s<   t d t| tjrttdd| j_| S t	dt
|  )af  
    Set the model layer spec to a modelopt spec variant. This function updates the model
    config with the appropriate modelopt layer specification based on the model type.

    Args:
        model_cfg (vlm.Llama4OmniConfig): The model config.

    Returns:
        vlm.Llama4OmniConfig: The model config updated for the modelopt layer specification.
    z<Setting model layer specification to the modelopt layer specT)remap_te_layernormz1No modelopt layer spec supported for config type )r   infor   r   Llama4OmniConfigr   _get_llama4_modelopt_speclanguage_transformer_configtransformer_layer_spec
ValueErrorr    )r$   r"   r"   r#   r   4   s   

r   FNoner   local_core_attentionr%   real_quant_cfg
qk_l2_normc                 C   s    t | ||||}t| |d}|S )z@Set model.config.transformer_layer_spec to llama4_modelopt_spec.)gpt_decoder_block_spec)r   r	   )r   r-   r%   r.   r/   r0   llama4_layer_specr"   r"   r#   r(   J   s
   
r(      T
model_pathtensor_model_parallel_sizepipeline_model_parallel_size"num_layers_in_first_pipeline_stage!num_layers_in_last_pipeline_stageexpert_model_parallel_sizedevices	num_nodesinference_onlylegacy_ckptstrategy_kwargstrainer_kwargsmodel_config_overridesc                 C   sb  |
du ri }
|du ri }|du ri }t d|  d |r6tjd|||tjdddd|	r.tjndd	|
}ntjd|||tj|	rCtjndd|
}tjd||d|tj	d	tjdd
dd|}tj
jt| dd}t|j | D ]\}}t d| d|  t|j|| qs|r|`|r||j_|r||j_t| || d|j_t d| d ||fS )a  Loads a GPT model from a NeMo 2.0 checkpoint using modelopt layer spec.

    Args:
        model_path (str): Path to the NeMo checkpoint.
        tensor_model_parallel_size (int): Size of the tensor model parallelism.
        pipeline_model_parallel_size (int): Size of the pipeline model parallelism.
        num_layers_in_first_pipeline_stage (int): Number of layers in the first pipeline stage.
        num_layers_in_last_pipeline_stage (int): Number of layers in the last pipeline stage.
        expert_model_parallel_size (int): Size of the expert model parallelism.
        devices (int): Number of devices on each node.
        num_nodes (int): Number of nodes being used.
        inference_only (bool): If True, loads the model for inference only w/o initializing the optimizer.
        legacy_ckpt (bool): If True, allow loading ckpt saved with older version of TE.
        strategy_kwargs (Optional[dict]): Additional keyword arguments for nl.MegatronStrategy.
        trainer_kwargs (Optional[dict]): Additional keyword arguments for nl.Trainer.
        model_config_overrides (Optional[dict]): keyword arguments to override model config.

    Returns:
        Union[vlm.Llama4OmniModel]: The loaded model with the specified configuration.
    NzLoading model from z with modelopt layer spec...Fpytorch)	r4   r5   r8   pipeline_dtypeckpt_load_optimizerckpt_parallel_save_optimsetup_optimizersddpckpt_load_strictness)r4   r5   r8   rA   rF   gpuz
bf16-mixedT)	precisionparams_dtypeautocast_enabledgrad_reduce_in_fp32)r9   r:   acceleratorstrategypluginsr   )r   subpathzOverriding model.config.z to zLoaded model: 
r"   )r   r&   nlMegatronStrategytorchbfloat16r   LOG_ALLTrainerMegatronMixedPrecisionioload_contextr   r   r   itemssetattroptimr6   r7   r
   rM   restore_config)r3   r4   r5   r6   r7   r8   r9   r:   r;   r<   r=   r>   r?   rM   trainerr   kvr"   r"   r#   r   Y   sp   #
	

)FFr,   F)r2   r2   NNr2   r2   r2   TFNNN)+	functoolsr   lightning.pytorchr@   LrS   +megatron.core.dist_checkpointing.validationr   4megatron.core.post_training.modelopt.gpt.model_specsr   $megatron.core.transformer.spec_utilsr   ,megatron.core.transformer.transformer_configr   nemor   rQ   nemo.collectionsr   +nemo.collections.llm.gpt.model.llama4_utilsr	   #nemo.collections.vlm.inference.baser
   nemo.lightning.ckpt_utilsr   nemo.lightning.io.plr   
nemo.utilsr   __all__LightningModuler   r   r'   r   boolr(   intdicttupler   rV   r   r"   r"   r"   r#   <module>   s   
	
