o
    	Ti"                     @   s  d dl mZmZ d dlmZ d dlmZmZ d dlm	Z	 d dl
mZ eG dd dZdd	 Ze	d
ZedkreeZe d  ZedddZejeejejddZejZeejdded< eeZejdddZejreej ejejdd dS dS dS )    )	dataclassfield)Optional)featuresload_dataset)	ModelCard)HfArgumentParserc                   @   s^   e Zd ZU dZedddidZeed< edddidZe	ed	< ed
ddidZ
ee ed< d
S )ScriptArgumentsa  
    Arguments for the script.

    Args:
        push_to_hub (`bool`, *optional*, defaults to `False`):
            Whether to push the dataset to the Hugging Face Hub.
        repo_id (`str`, *optional*, defaults to `"trl-lib/rlaif-v"`):
            Hugging Face repository ID to push the dataset to.
        dataset_num_proc (`int` or `None`, *optional*, defaults to `None`):
            Number of workers to use for dataset processing.
    Fhelpz4Whether to push the dataset to the Hugging Face Hub.)defaultmetadatapush_to_hubztrl-lib/rlaif-vz2Hugging Face repository ID to push the dataset to.repo_idNz0Number of workers to use for dataset processing.dataset_num_proc)__name__
__module____qualname____doc__r   r   bool__annotations__r   strr   r   int r   r   M/home/ubuntu/.local/lib/python3.10/site-packages/examples/datasets/rlaif-v.pyr	      s   
 r	   c                 C   sb   dddid| d dgdg}dd| d	 dgdg}dd| d
 dgdg}|| d g||dS )a  
    Convert prompt from "xxx" to [{"role": "user", "content": [{"type": "image"}, {"type": "text", "text": "xxx"}]}]
    and chosen and rejected from "xxx" to [{"role": "assistant", "content": [{"type": "text", "text": "xxx"}]}].
    Images are wrapped into a list.
    usertypeimagetextquestion)r   r   )rolecontent	assistantchosenrejected)promptimagesr"   r#   r   )exampler$   r"   r#   r   r   r   to_conversational3   s   r'   a>  
---
tags: [trl]
---

# RLAIF-V Dataset

## Summary

The RLAIF-V dataset is a processed version of the [openbmb/RLAIF-V-Dataset](https://huggingface.co/datasets/openbmb/RLAIF-V-Dataset#dataset-card-for-rlaif-v-dataset), specifically curated to train vision-language models using the [TRL library](https://github.com/huggingface/trl) for preference learning tasks. It contains 83,132 high-quality comparison pairs, each comprising an image and two textual descriptions: one preferred and one rejected. This dataset enables models to learn human preferences in visual contexts, enhancing their ability to generate and evaluate image captions.

## Data Structure

- **Format**: [Conversational](https://huggingface.co/docs/trl/main/dataset_formats#conversational)
- **Type**: [Preference](https://huggingface.co/docs/trl/main/dataset_formats#preference)

Columns:
- `"prompt"`: The task related to the image.
- `"images"`: The image.
- `"chosen"`: The preferred answer.
- `"rejected"`: An alternative answer that was not preferred.

This structure allows models to learn to prefer the _chosen_ response over the _rejected_ one, thereby aligning with human preferences in visual tasks.

## Generation script

The script used to generate this dataset can be found [here](https://github.com/huggingface/trl/blob/main/examples/datasets/rlaif-v.py).
__main__zopenbmb/RLAIF-V-Datasettrain)split   )num_procremove_columnswriter_batch_sizeT)decoder%   g{Gz?)	test_sizer.   dataset)	repo_typeN)dataclassesr   r   typingr   datasetsr   r   huggingface_hubr   transformersr   r	   r'   
model_cardr   parserparse_args_into_dataclassesscript_argsr1   mapr   column_namesfSequenceImagecasttrain_test_splitr   r   r   r   r   r   <module>   s8   
