o
    wi^                     @   st  d dl mZ d dlmZ d dlZd dlZd dlm	Z	 d dl
mZ d dlmZ d dlmZ d dlmZmZ d dlmZ d d	lmZ d d
lmZ d dlmZ d dlmZ d dlmZ d dl m!Z! dZ"dZ#ej$j%e"ddej&ej' fddZ(ej$j%ej)e"d					d)dee* de*de+de+dee* dej,fdd Z-e.d!krd"d#iZ/e-dd$Z0ej1dd%e/d&Z2eje0e2d'd( dS dS )*    )OptionalN)DistributedDataParallelConfig)OptimizerConfig)Qwen2VLImageProcessor)	lightning)llmvlm)AutoTokenizer)tensorboard_logger)Qwen25VLVisionConfig)MegatronCommOverlapCallback)CosineAnnealingScheduler)MegatronOptimizerModule)TimingCallbackqwen25vl_7bzQwen/Qwen2.5-VL-7B-Instructnamereturnc                   C   s   t jtjt tjdS )af  
    Factory function to create a Qwen2.5VL 7B model configuration.

    Returns:
        run.Config[pl.LightningModule]: Configuration for the Qwen2.5VL 7B model.

    Examples:
        CLI usage:
            $ nemo llm finetune model=qwen25vl_7b ...

        Python API usage:
            >>> model_config = model()
            >>> print(model_config)
    )config)runConfigr   Qwen2VLModelQwen2VLConfig r   r   e/home/ubuntu/sommelier/.venv/lib/python3.10/site-packages/nemo/collections/vlm/recipes/qwen25vl_7b.pymodel'   s   r   )targetr   default      nonedirr   	num_nodesnum_gpus_per_nodepeft_schemec                 C   s  t jtjdddtjdtdddddddd}t jtddd}t jtj||dd	|t jtj	d
dt t
|gddddd}d}t jtj|ddt ttt tdd}	t jtj|d}
t t}t jtjd|j|jd  |
j|j|jd  d}t jtjfi d|
d|d|ddddddddddddddd dd!dd"dd#dd$dd%dd&d}t jtj|d't ttd(}t jtjddd)}t jtd*d+d,d-ddd.}t jtddd/d0d1}t t||}t jtj|||	tj| |t|d2d3||d4}|S )5a<  
    Create a finetuning recipe for Qwen2.5VL 7B model.

    This function sets up a complete configuration for finetuning, including
    model, trainer, data, logging, optimization, and resumption settings.

    Args:
        dir (Optional[str]): Directory for saving logs and checkpoints.
        name (str): Name of the finetuning run.
        num_nodes (int): Number of compute nodes to use.
        num_gpus_per_node (int): Number of GPUs per node.

    Returns:
        run.Partial: Partial configuration for finetuning.

    Examples:
        CLI usage:
            $ nemo llm finetune --factory qwen25vl_7b

        Python API usage:
            >>> recipe = finetune_recipe(name="qwen25vl_7b_finetune", num_nodes=1)
            >>> print(recipe)

    Note:
        This recipe uses the Mock dataset for finetuning.
    r      T)check_for_nan_in_gradgrad_reduce_in_fp32overlap_grad_reduceoverlap_param_gatheraverage_in_collective)tensor_model_parallel_sizecontext_parallel_sizepipeline_model_parallel_sizepipeline_dtypesequence_parallelddpuse_te_rng_trackernccl)tp_comm_overlaptp_comm_bootstrap_backend
   gpuz
bf16-mixed)	precisiong        r   )r"   devices	max_stepsacceleratorstrategyplugins	callbacksval_check_intervallimit_val_batcheslog_every_n_stepsnum_sanity_val_stepsi    @   )
seq_lengthglobal_batch_sizemicro_batch_size	tokenizerimage_processornum_workers)rC   	mcore_mlp)projector_type
input_sizehidden_sizeffn_hidden_sizelanguage_transformer_configvision_transformer_configvision_projection_configfreeze_language_modelFfreeze_vision_modelenable_cuda_graphr1   gradient_accumulation_fusioncross_entropy_loss_fusionbias_activation_fusionbias_dropout_fusionmasked_softmax_fusionattention_softmax_in_fp32apply_rope_fusionr3   tp_comm_overlap_rs_dgradoverlap_p2p_commz	qwen25-vl)r   model_versionrF   )resume_if_existsresume_ignore_no_checkpointadamg>g?gffffff?)	optimizerlr
adam_beta1
adam_beta2use_distributed_optimizerbf16i  gHz>)r9   warmup_stepsconstant_stepsmin_lrr   )r!   r   r
   )r   trainerdatalogoptimresume) r   r   nlMegatronStrategytorchbfloat16r   r   TrainerMegatronMixedPrecisionr   r   Qwen2VLMockDataModuler	   HF_MODEL_NAMEr   r   Qwen25Config7Br   MultimodalProjectorConfigrL   spatial_merge_sizer   r   
AutoResumer   r   r   Partialfinetunedefault_logr
   )r!   r   r"   r#   r$   r;   tp_comm_overlap_callbackrj   max_sequence_lengthrk   rN   rO   rP   qwen25vl_configr   nemo_resume
opt_configschedoptreciper   r   r   finetune_recipe;   s   $

		

		r   __main__CUDA_VISIBLE_DEVICESz0,1,2,3,4,5,6,7)r#   torchrun)ntasks_per_nodelauncherenv_varszqwen2.5vl_7b_finetune)executorr   )Nr   r   r   r    )3typingr   lightning.pytorchpytorchplnemo_runr   rq   megatron.core.distributedr   megatron.core.optimizerr   6transformers.models.qwen2_vl.image_processing_qwen2_vlr   nemor   ro   nemo.collectionsr   r   =nemo.collections.common.tokenizers.huggingface.auto_tokenizerr	   (nemo.collections.llm.recipes.log.defaultr
   "nemo.collections.vlm.qwen2vl.modelr   6nemo.lightning.pytorch.callbacks.megatron_comm_overlapr   nemo.lightning.pytorch.optimr   %nemo.lightning.pytorch.optim.megatronr   nemo.utils.exp_managerr   NAMErv   clifactoryr   LightningModuler   r|   strintr{   r   __name__r   r   LocalExecutorr   r   r   r   r   <module>   s^    
