o
    	Ti                  	   @   s  d Z ddlZddlmZ ddlmZmZ ddlmZm	Z	m
Z
mZmZmZmZmZ edkreeee
fZe \ZZZejdv rBejneeejZeeZeejejeedurYe ndedZejejfd	ejieZ eeZ!e!du rejejfd	ejieZ"ndZ"ejejejd
dZ#e#j$Z$e j%j&dkrne j%j&dkrde#_'n	e j%j&dkrde#_'e$j(du re$j)e$_(ej*rdd e + D e _,eej-ej.ej/dZ0e	e e"ee0ej1 ej2dkre0ej3 nde#e!dZ4e45  e46ej7 ej8re4j8ej-d dS dS dS )a  
Without dataset streaming:

```
accelerate launch examples/scripts/dpo_vlm.py     --dataset_name HuggingFaceH4/rlaif-v_formatted     --model_name_or_path Qwen/Qwen2.5-VL-3B-Instruct     --per_device_train_batch_size 2     --gradient_accumulation_steps 32     --dataset_num_proc 32     --output_dir dpo_idefics_rlaif-v     --torch_dtype bfloat16     --gradient_checkpointing     --use_peft     --lora_target_modules=all-linear     --report_to wandb
```

With dataset streaming:

```
accelerate launch examples/scripts/dpo_vlm.py     --dataset_name HuggingFaceH4/rlaif-v_formatted     --dataset_streaming     --model_name_or_path Qwen/Qwen2.5-VL-3B-Instruct     --per_device_train_batch_size 2     --max_steps 100     --gradient_accumulation_steps 32     --dataset_num_proc 32     --output_dir dpo_idefics_rlaif-v     --torch_dtype bfloat16     --gradient_checkpointing     --use_peft     --lora_target_modules=all-linear     --report_to wandb
```
    N)load_dataset)AutoModelForVision2SeqAutoProcessor)	DPOConfig
DPOTrainerModelConfigScriptArguments	TrlParserget_kbit_device_mapget_peft_configget_quantization_config__main__)autoN)revisionattn_implementationtorch_dtype
device_mapquantization_configtrust_remote_codeF)r   do_image_splittingidefics2	paligemmaa  {% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% for message in messages %}<|im_start|>{% if message['role'] == 'user' %}USER: {% else %}ASSISTANT: {% endif %}{% for item in message['content'] if item['type'] == 'text' %}{{ item['text'] }}<|im_end|>{% endfor %}{% if message['role'] == 'user' %} {% else %}{{eos_token}}{% endif %}{% endfor %}{% if add_generation_prompt %}ASSISTANT: {% endif %}llavaa  {% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% for message in messages %}{% if message['role'] == 'user' %}USER: {% else %}ASSISTANT: {% endif %}{% for item in message['content'] %}{% if item['type'] == 'text' %}{{ item['text'] }}{% elif item['type'] == 'image' %}<image>{% endif %}{% endfor %}{% if message['role'] == 'user' %} {% else %}{{eos_token}}{% endif %}{% endfor %}{% if add_generation_prompt %}ASSISTANT: {% endif %}c                 C   s    g | ]\}}|j tjkr|qS  )dtypetorchbool).0namebufferr   r   L/home/ubuntu/.local/lib/python3.10/site-packages/examples/scripts/dpo_vlm.py
<listcomp>   s    r!   )r   	streamingno)argstrain_dataseteval_datasetprocessing_classpeft_config)dataset_name)9__doc__r   datasetsr   transformersr   r   trlr   r   r   r   r	   r
   r   r   __name__parserparse_args_and_configscript_argstraining_args
model_argsr   getattrr   dictmodel_revisionr   model_kwargsfrom_pretrainedmodel_name_or_pathr   modelr(   	ref_model	processor	tokenizerconfig
model_typechat_template	pad_token	eos_tokenignore_bias_buffersnamed_buffers!_ddp_params_and_buffers_to_ignorer)   dataset_configdataset_streamingdatasetdataset_train_spliteval_strategydataset_test_splittrainertrain
save_model
output_dirpush_to_hubr   r   r   r    <module>   s   &(

	
P