o
    	Ti                  	   @   s  d Z ddlZddlmZ ddlmZ ddlmZmZ ddl	m
Z
mZmZmZmZmZmZmZ edkreee
efZe \ZZZejdv rHejneeejZeeZeejejejeedurae nded	Zej ej!fi eZ"eeZ#e#du rej ej!fi eZ$ndZ$ej ej!ejd
Z%eej&ej'ej(dZ)e)ej* Z+ej,dkre)ej- ndZ.dd Z/e+j0e/ej1dZ+e.dure.j0e/ej1dZ.ee"e$ee+e.e%e#dZ2e23  e24ej5 ej6re2j6ej&d dS dS dS )aA  
python examples/scripts/mpo_vlm.py     --dataset_name HuggingFaceH4/rlaif-v_formatted     --model_name_or_path Qwen/Qwen2.5-VL-3B-Instruct     --per_device_train_batch_size 4     --per_device_eval_batch_size 4     --num_train_epochs 1     --gradient_accumulation_steps 8     --dataset_num_proc 1     --output_dir dpo_idefics_rlaif-v     --torch_dtype bfloat16     --gradient_checkpointing     --use_peft     --lora_target_modules down_proj, o_proj, k_proj, q_proj, gate_proj, up_proj, v_proj     --loss_type sigmoid bco_pair sft     --loss_weights 0.8 0.2 1.0     --bf16 True
    N)load_dataset)Image)AutoModelForVision2SeqAutoProcessor)	DPOConfig
DPOTrainerModelConfigScriptArguments	TrlParserget_kbit_device_mapget_peft_configget_quantization_config__main__)autoN)trust_remote_coderevisionattn_implementationtorch_dtype
device_mapquantization_config)r   )name	streamingnoc                 C   s:   | d d }t |tjr|jdkr|d}|g| d< | S )Nimagesr   RGB)
isinstancer   modeconvert)exampleimage r    L/home/ubuntu/.local/lib/python3.10/site-packages/examples/scripts/mpo_vlm.py
ensure_rgbi   s   


r"   )num_proc)model	ref_modelargstrain_dataseteval_datasetprocessing_classpeft_config)dataset_name)7__doc__torchdatasetsr   PILr   transformersr   r   trlr   r   r   r	   r
   r   r   r   __name__parserparse_args_and_configscript_argstraining_args
model_argsr   getattrr   dictr   model_revisionr   model_kwargsfrom_pretrainedmodel_name_or_pathr$   r*   r%   	processorr+   dataset_configdataset_streamingdatasetdataset_train_splitr'   eval_strategydataset_test_splittest_datasetr"   mapdataset_num_proctrainertrain
save_model
output_dirpush_to_hubr    r    r    r!   <module>   s|   (


N