o
    wih                     @   st  d dl mZ d dlmZ d dlZd dlZd dlm	Z	 d dl
mZ d dlmZ d dlmZ d dlmZmZ d dlmZ d d	lmZ d d
lmZ d dlmZ d dlmZ d dlmZ d dl m!Z! dZ"dZ#ej$j%e"ddej&ej' fddZ(ej$j%ej)e"d					d)dee* de*de+de+dee* dej,fdd Z-e.d!krd"d#iZ/e-dd$Z0ej1dd%e/d&Z2eje0e2d'd( dS dS )*    )OptionalN)DistributedDataParallelConfig)OptimizerConfig)Qwen2VLImageProcessor)	lightning)llmvlm)AutoTokenizer)tensorboard_logger)Qwen25VLVisionConfig)MegatronCommOverlapCallback)CosineAnnealingScheduler)MegatronOptimizerModule)TimingCallbackqwen25vl_32bzQwen/Qwen2.5-VL-32B-Instructnamereturnc                   C   s   t jtjt tjdS )ai  
    Factory function to create a Qwen2.5VL 32B model configuration.

    Returns:
        run.Config[pl.LightningModule]: Configuration for the Qwen2.5VL 32B model.

    Examples:
        CLI usage:
            $ nemo llm finetune model=qwen25vl_32b ...

        Python API usage:
            >>> model_config = model()
            >>> print(model_config)
    )config)runConfigr   Qwen2VLModelQwen2VLConfig r   r   f/home/ubuntu/sommelier/.venv/lib/python3.10/site-packages/nemo/collections/vlm/recipes/qwen25vl_32b.pymodel'   s   r   )targetr   default      nonedirr   	num_nodesnum_gpus_per_nodepeft_schemec                 C   s  t jtjdddtjdtdddddddd}t jtddd}t jtj||d	d
|t jtj	ddt t
|gd	dddd}d}t jtj|ddt ttt tdd}	t jtj|d}
t t}t jtjd|j|jd  |
j|j|jd  d}t jtjfi d|
d|d|ddddddddddddd dd!dd"dd#dd$dd%dd&dd'd}t jtj|d(t ttd)}t jtjddd*}t jtd+d,d-d.ddd/}t jtd	dd0d1d2}t t||}t jtj|||	tj| |t|d3d4||d5}|S )6a?  
    Create a finetuning recipe for Qwen2.5VL 32B model.

    This function sets up a complete configuration for finetuning, including
    model, trainer, data, logging, optimization, and resumption settings.

    Args:
        dir (Optional[str]): Directory for saving logs and checkpoints.
        name (str): Name of the finetuning run.
        num_nodes (int): Number of compute nodes to use.
        num_gpus_per_node (int): Number of GPUs per node.

    Returns:
        run.Partial: Partial configuration for finetuning.

    Examples:
        CLI usage:
            $ nemo llm finetune --factory qwen25vl_32b

        Python API usage:
            >>> recipe = finetune_recipe(name="qwen25vl_32b_finetune", num_nodes=1)
            >>> print(recipe)

    Note:
        This recipe uses the Mock dataset for finetuning.
    r         T)check_for_nan_in_gradgrad_reduce_in_fp32overlap_grad_reduceoverlap_param_gatheraverage_in_collective)tensor_model_parallel_sizecontext_parallel_sizepipeline_model_parallel_sizepipeline_dtypesequence_parallelddpuse_te_rng_trackernccl)tp_comm_overlaptp_comm_bootstrap_backend
   gpuz
bf16-mixed)	precisiong        r   )r"   devices	max_stepsacceleratorstrategyplugins	callbacksval_check_intervallimit_val_batcheslog_every_n_stepsnum_sanity_val_stepsi    @   )
seq_lengthglobal_batch_sizemicro_batch_size	tokenizerimage_processornum_workers)rD   	mcore_mlp)projector_type
input_sizehidden_sizeffn_hidden_sizelanguage_transformer_configvision_transformer_configvision_projection_configfreeze_language_modelFfreeze_vision_modelenable_cuda_graphr2   gradient_accumulation_fusioncross_entropy_loss_fusionbias_activation_fusionbias_dropout_fusionmasked_softmax_fusionattention_softmax_in_fp32apply_rope_fusionr4   tp_comm_overlap_rs_dgradoverlap_p2p_commz	qwen25-vl)r   model_versionrG   )resume_if_existsresume_ignore_no_checkpointadamg>g?gffffff?)	optimizerlr
adam_beta1
adam_beta2use_distributed_optimizerbf16i  gHz>)r:   warmup_stepsconstant_stepsmin_lrr   )r!   r   r
   )r   trainerdatalogoptimresume) r   r   nlMegatronStrategytorchbfloat16r   r   TrainerMegatronMixedPrecisionr   r   Qwen2VLMockDataModuler	   HF_MODEL_NAMEr   r   Qwen25Config32Br   MultimodalProjectorConfigrM   spatial_merge_sizer   r   
AutoResumer   r   r   Partialfinetunedefault_logr
   )r!   r   r"   r#   r$   r<   tp_comm_overlap_callbackrk   max_sequence_lengthrl   rO   rP   rQ   qwen25vl_configr   nemo_resume
opt_configschedoptreciper   r   r   finetune_recipe;   s   $

		

		r   __main__CUDA_VISIBLE_DEVICESz0,1,2,3,4,5,6,7)r#   torchrun)ntasks_per_nodelauncherenv_varszqwen2.5vl_32b_finetune)executorr   )Nr   r   r   r    )3typingr   lightning.pytorchpytorchplnemo_runr   rr   megatron.core.distributedr   megatron.core.optimizerr   6transformers.models.qwen2_vl.image_processing_qwen2_vlr   nemor   rp   nemo.collectionsr   r   =nemo.collections.common.tokenizers.huggingface.auto_tokenizerr	   (nemo.collections.llm.recipes.log.defaultr
   "nemo.collections.vlm.qwen2vl.modelr   6nemo.lightning.pytorch.callbacks.megatron_comm_overlapr   nemo.lightning.pytorch.optimr   %nemo.lightning.pytorch.optim.megatronr   nemo.utils.exp_managerr   NAMErw   clifactoryr   LightningModuler   r}   strintr|   r   __name__r   r   LocalExecutorr   r   r   r   r   <module>   s^    
