o
    	TiA!                     @   s   d Z ddlZddlZddlZddlZddlmZmZ ddlm	Z	m
Z
 ddlmZ ddlmZmZ ddlmZmZmZmZmZmZmZmZ dee d	eej fd
dZdeeef d	eeef fddZdededed	efddZdd Z e!dkr}e   dS dS )ak  
Train Gemma-3 on the HuggingFaceH4/llava-instruct-mix-vsft dataset (single-image).

accelerate launch     --config_file examples/accelerate_configs/deepspeed_zero3.yaml     examples/scripts/sft_vlm_gemma3.py     --dataset_name HuggingFaceH4/llava-instruct-mix-vsft     --model_name_or_path google/gemma-3-4b-it     --per_device_train_batch_size 1     --gradient_accumulation_steps 1     --output_dir gemma-3-4b-it-trl-sft-llava-instruct-mix-vsft     --bf16     --torch_dtype bfloat16     --use_peft     --lora_target_modules all-linear     --attn_implementation eager

Train Gemma-3 on the FanqingM/MMIU-Benchmark dataset (multi-image).

accelerate launch     --config_file examples/accelerate_configs/deepspeed_zero3.yaml     examples/scripts/sft_vlm_gemma3.py     --dataset_name FanqingM/MMIU-Benchmark     --dataset_train_split test     --model_name_or_path google/gemma-3-4b-it     --per_device_train_batch_size 1     --gradient_accumulation_steps 1     --output_dir gemma-3-4b-it-trl-sft-MMIU-Benchmark     --bf16     --torch_dtype bfloat16     --use_peft     --lora_target_modules all-linear
    --attn_implementation eager
    N)DatasetDictload_dataset)hf_hub_downloadlist_repo_files)Image)AutoModelForImageTextToTextAutoProcessor)ModelConfigScriptArguments	SFTConfig
SFTTrainer	TrlParserget_kbit_device_mapget_peft_configget_quantization_configmessagesreturnc                 C   s   g }| D ]F}| dg }t|ts|g}|D ]3}t|trId|v s(| ddkrId|v r1|d }n|}|d urItt|d }||	d qq|S )NcontentimagetypebytesRGB)
get
isinstancelistdictr   openioBytesIOappendconvert)r   image_inputsmsgr   elementr    r$   S/home/ubuntu/.local/lib/python3.10/site-packages/examples/scripts/sft_vlm_gemma3.pyprocess_vision_infoP   s    
 
	r&   samplesc           	      C   s&  dg i}t t| d D ]}g }| d | D ]L}z.t|d}| }W d    n1 s-w   Y  tt|d}|d|d W q t	yb } zt
d| d	|  W Y d }~qd }~ww |d d
d| d | dgdd|d| d | dg ddd| d | dgdg q|S )Nr   questioninput_image_pathrbr   r   )r   r   zError processing image z: systemtextcontext)r   r,   )roler   user	assistantoutput)rangelenr   readr   r   r   r    r   	Exceptionprint)	r'   formatted_samplescontimagesimg_pathf	img_bytesr   er$   r$   r%   format_datac   s,   
r>   datasetdataset_namedataset_train_splitc           	   	   C   s   t |dd}dd |D }|D ]1}t||dd}|dd}tj|dd	 t|d
}|| W d    n1 s;w   Y  q| jt	dddd} | S )Nr?   )	repo_typec                 S   s   g | ]	}| d r|qS ).zip)endswith).0r;   r$   r$   r%   
<listcomp>~   s    z#prepare_dataset.<locals>.<listcomp>)repo_idfilenamerB   rC    T)exist_okr      )batched
batch_sizenum_proc)
r   r   replaceosmakedirszipfileZipFile
extractallmapr>   )	r?   r@   rA   	all_files	zip_fileszip_filenamezip_pathextract_folderzip_refr$   r$   r%   prepare_dataset|   s   r^   c               	      sf  t tttf} |  \}}}tdd|_d|_ddi|_|j	dv r$|j	nt
t|j	}t|}t|j|j||d ur;t nd |d}tj|j|jd d j_tj|jfd	|ji|} fd
d}t|j|jd}	|jdkrut|	|j|j}	t||||	|j |jdkr|	|j nd  t|d}
|
   |
!|j" |j#r|
j#|jd |
j$j%r #|j& d S d S d S )NF)use_reentrantskip_prepare_datasetT)autoN)revisionattn_implementationtorch_dtype
device_mapquantization_config)trust_remote_coderightrg   c                    s    fdd| D }d| d v rdd | D }ndd | D } ||ddd	}|d
   } j jjd g}d|| jjk< d|||k< d||dk< ||d< |S )Nc                    s$   g | ]} j |d  ddd qS )r   F)tokenizeadd_generation_prompt)apply_chat_templatestriprE   example	processorr$   r%   rF      s    z,main.<locals>.collate_fn.<locals>.<listcomp>r9   r   c                 S   s   g | ]}d d |d D qS )c                 S   s   g | ]}| d qS )r   )r    )rE   imgr$   r$   r%   rF      s    z7main.<locals>.collate_fn.<locals>.<listcomp>.<listcomp>r9   r$   rm   r$   r$   r%   rF      s    c                 S   s   g | ]}t |d  qS )r   )r&   rm   r$   r$   r%   rF      s    ptT)r9   r,   return_tensorspadding	input_ids	boi_tokenii   labels)clone	tokenizerconvert_tokens_to_idsspecial_tokens_mappad_token_id)examplestextsr9   batchrw   image_token_idro   r$   r%   
collate_fn   s"   
zmain.<locals>.collate_fn)namezFanqingM/MMIU-Benchmarkno)modelargsdata_collatortrain_dataseteval_datasetprocessing_classpeft_config)r@   )'r   r
   r   r	   parse_args_and_configr   gradient_checkpointing_kwargsremove_unused_columnsdataset_kwargsrd   getattrtorchr   model_revisionrc   r   r   from_pretrainedmodel_name_or_pathrg   ry   padding_sider   r   r@   dataset_configr^   rA   r   eval_strategydataset_test_splitr   train
save_model
output_dirpush_to_hubacceleratoris_main_processhub_model_id)parserscript_argstraining_args
model_argsrd   rf   model_kwargsr   r   r?   trainerr$   ro   r%   main   s^   
 

r   __main__)"__doc__r   rR   rT   r   datasetsr   r   huggingface_hubr   r   PILr   transformersr   r   trlr	   r
   r   r   r   r   r   r   r   r   r&   stranyr>   r^   r   __name__r$   r$   r$   r%   <module>   s"   #("Y
