o
    	Ti                     @   s   d dl mZmZ d dlmZ d dlmZ d dlmZ d dl	m
Z
 eG dd dZdd	 Zd
d ZedZedkrse
eZe d  ZedddZejeg dejdZeeZejdddZejrueej ejejdd dS dS dS )    )	dataclassfield)Optional)load_dataset)	ModelCard)HfArgumentParserc                   @   s^   e Zd ZU dZedddidZeed< edddidZe	ed	< ed
ddidZ
ee ed< d
S )ScriptArgumentsa  
    Arguments for the script.

    Args:
        push_to_hub (`bool`, *optional*, defaults to `False`):
            Whether to push the dataset to the Hugging Face Hub.
        repo_id (`str`, *optional*, defaults to `"trl-lib/ultrafeedback-prompt"`):
            Hugging Face repository ID to push the dataset to.
        dataset_num_proc (`int` or `None`, *optional*, defaults to `None`):
            Number of workers to use for dataset processing.
    Fhelpz4Whether to push the dataset to the Hugging Face Hub.)defaultmetadatapush_to_hubztrl-lib/ultrafeedback-promptz2Hugging Face repository ID to push the dataset to.repo_idNz0Number of workers to use for dataset processing.dataset_num_proc)__name__
__module____qualname____doc__r   r   bool__annotations__r   strr   r   int r   r   Z/home/ubuntu/.local/lib/python3.10/site-packages/examples/datasets/ultrafeedback-prompt.pyr      s   
 r   c                 C   s   d| d dg}d|iS )Nuserinstruction)rolecontentpromptr   )exampler   r   r   r   to_unpaired_preference3   s   r   c                 C   s    t | d d d dkrdS dS )Nr   r   r   i   FT)len)r   r   r   r   drop_long_prompt8   s   r!   a  
---
tags: [trl]
---

# UltraFeedback - Prompts Dataset

## Summary

The UltraFeedback - Prompts dataset is a processed version of the [UltraFeedback](https://huggingface.co/datasets/openbmb/UltraFeedback) dataset for model evaluation on specific aspects like helpfulness, honesty, and instruction-following.

## Data Structure

- **Format**: [Conversational](https://huggingface.co/docs/trl/main/dataset_formats#conversational)
- **Type**: [Prompt-only](https://huggingface.co/docs/trl/main/dataset_formats#prompt-only)

Column:
- `"prompt"`: The input question or instruction provided to the model.

## Generation script

The script used to generate this dataset can be found [here](https://github.com/huggingface/trl/blob/main/examples/datasets/ultrafeedback-prompt.py).
__main__zopenbmb/UltraFeedbacktrain)split)sourcer   modelscompletionscorrect_answersincorrect_answers)remove_columnsnum_procg?*   )	test_sizeseeddataset)	repo_typeN)dataclassesr   r   typingr   datasetsr   huggingface_hubr   transformersr   r   r   r!   
model_cardr   parserparse_args_into_dataclassesscript_argsr/   mapr   filtertrain_test_splitr   r   r   r   r   r   <module>   s4   
