o
    	Ti                  	   @   s  d Z ddlZddlmZ ddlmZmZmZ ddlm	Z	m
Z
mZmZmZmZmZmZ edkree
ee	fZe \ZZZedde_de_d	d
ie_ejdv rRejneeejZeeZeejejeedurie ndedZ ej!ej"ej#dZ$ej!ej"fdej#ie Z%dd Z&eej'ej(dZ)ee%ee&e)ej* ej+dkre)ej, nde$eedZ-e-.  e-/ej0 ej1re-j1ej'd e-j2j3re$1ej4 dS dS dS dS )a  
pip install pillow

# Tested on 8x H100 GPUs
accelerate launch
    --config_file=examples/accelerate_configs/deepspeed_zero3.yaml     examples/scripts/sft_vlm.py     --dataset_name HuggingFaceH4/llava-instruct-mix-vsft     --model_name_or_path llava-hf/llava-1.5-7b-hf     --per_device_train_batch_size 8     --gradient_accumulation_steps 8     --output_dir sft-llava-1.5-7b-hf     --bf16     --torch_dtype bfloat16     --gradient_checkpointing

For LLaVA-NeXT, use: (requires transformers>=4.45)
    --model_name_or_path llava-hf/llava-v1.6-mistral-7b-hf

For meta-llama/Llama-3.2-11B-Vision-Instruct, use: (requires transformers>=4.45.1)
    --model_name_or_path meta-llama/Llama-3.2-11B-Vision-Instruct
    N)load_dataset)AutoModelForVision2SeqAutoProcessorLlavaForConditionalGeneration)ModelConfigScriptArguments	SFTConfig
SFTTrainer	TrlParserget_kbit_device_mapget_peft_configget_quantization_config__main__F)use_reentrantskip_prepare_datasetT)autoN)revisionattn_implementationtorch_dtype
device_mapquantization_config)trust_remote_coder   c                 C   s   dd | D }dd | D }t ttrdd |D }t||ddd}|d  }d	||tjjk< tjtj}d	|||k< ||d
< |S )Nc                 S   s   g | ]}t j|d  ddqS )messagesF)tokenize)	processorapply_chat_template.0example r   L/home/ubuntu/.local/lib/python3.10/site-packages/examples/scripts/sft_vlm.py
<listcomp>_   s    zcollate_fn.<locals>.<listcomp>c                 S      g | ]}|d  qS )imagesr   r   r   r   r    r!   `       c                 S   r"   )r   r   )r   imager   r   r    r!   c   r$   ptT)r#   textreturn_tensorspadding	input_idsilabels)	
isinstancemodelr   r   clone	tokenizerpad_token_idconvert_tokens_to_idsimage_token)examplestextsr#   batchr+   image_token_idr   r   r    
collate_fn]   s   
r7   )nameno)r-   argsdata_collatortrain_dataseteval_datasetprocessing_classpeft_config)dataset_name)5__doc__torchdatasetsr   transformersr   r   r   trlr   r   r   r	   r
   r   r   r   __name__parserparse_args_and_configscript_argstraining_args
model_argsdictgradient_checkpointing_kwargsremove_unused_columnsdataset_kwargsr   getattrr   model_revisionr   model_kwargsfrom_pretrainedmodel_name_or_pathr   r   r-   r7   r@   dataset_configdatasetdataset_train_spliteval_strategydataset_test_splittrainertrain
save_model
output_dirpush_to_hubacceleratoris_main_processhub_model_idr   r   r   r    <module>   sf   (

K