o
    	Ti                     @   s   d dl mZmZ d dlmZ d dlmZ d dlmZ d dl	m
Z
 eG dd dZdd	 Zd
d ZedZedkrne
eZe d  ZdddZededZejeddg dejdZejrpeej ejejdd dS dS dS )    )	dataclassfield)Optional)load_dataset)	ModelCard)HfArgumentParserc                   @   s^   e Zd ZU dZedddidZeed< edddidZe	ed	< ed
ddidZ
ee ed< d
S )ScriptArgumentsa  
    Arguments for the script.

    Args:
        push_to_hub (`bool`, *optional*, defaults to `False`):
            Whether to push the dataset to the Hugging Face Hub.
        repo_id (`str`, *optional*, defaults to `"trl-lib/prm800k"`):
            Hugging Face repository ID to push the dataset to.
        dataset_num_proc (`int` or `None`, *optional*, defaults to `None`):
            Number of workers to use for dataset processing.
    Fhelpz4Whether to push the dataset to the Hugging Face Hub.)defaultmetadatapush_to_hubztrl-lib/prm800kz2Hugging Face repository ID to push the dataset to.repo_idNz0Number of workers to use for dataset processing.dataset_num_proc)__name__
__module____qualname____doc__r   r   bool__annotations__r   strr   r   int r   r   M/home/ubuntu/.local/lib/python3.10/site-packages/examples/datasets/prm800k.pyr      s   
 r   c                 C   s8  g }| d d }g }g }| d d D ]~}|d d u r(|d d u r(|d d u r( nit |d D ]/\}}||d kr]|d }|d d  |g }	|d	 d
k}
|d d  |
g }|||	|d q.|d d urs|d |d  }|d	 d
k}
n|d d ur|d }d}
n n|d }|| ||
 q||||d |S )Nquestionproblemlabelstepscompletionshuman_completionchosen_completiontextrating   )promptr   labelsT)	enumerateappend)exampleoutputsr#   previous_completionsprevious_labelsstepcompletion_idx
completioncontentr   r   r$   r   r   r   r   process_example3   s6   $
r/   c                    s\   g t | d }t|D ]  fdd|  D }t| qfddd D S )Nr   c                    s   i | ]	\}}||  qS r   r   ).0kv)idxr   r   
<dictcomp>^   s    z!process_batch.<locals>.<dictcomp>c                    s    i | ]   fd dD qS )c                    s   g | ]}|  qS r   r   )r0   r2   r1   r   r   
<listcomp>a   s    z,process_batch.<locals>.<dictcomp>.<listcomp>r   )r0   )r(   r5   r   r4   a   s     r   )lenrangeitemsextendr/   )examples
batch_sizer'   r   )r3   r(   r   process_batchZ   s   r=   a  
---
tags: [trl]
---

# PRM800K Dataset

## Summary

The PRM800K dataset is a processed version of [OpenAI's PRM800K](https://github.com/openai/prm800k), designed to train models using the [TRL library](https://github.com/huggingface/trl) for stepwise supervision tasks. It contains 800,000 step-level correctness labels for model-generated solutions to problems from the MATH dataset. This dataset enables models to learn and verify each step of a solution, enhancing their reasoning capabilities.

## Data Structure

- **Format**: [Standard](https://huggingface.co/docs/trl/main/dataset_formats#standard)
- **Type**: [Stepwise supervision](https://huggingface.co/docs/trl/main/dataset_formats#stepwise-supervision)

Columns:
- `"prompt"`: The problem statement.
- `"completions"`: A list of reasoning steps generated to solve the problem.
- `"labels"`: A list of booleans or floats indicating the correctness of each corresponding reasoning step.

This structure allows models to learn the correctness of each step in a solution, facilitating improved reasoning and problem-solving abilities.

## Generation script

The script used to generate this dataset can be found [here](https://github.com/huggingface/trl/blob/main/examples/datasets/prm800k.py).
__main__zUhttps://github.com/openai/prm800k/raw/refs/heads/main/prm800k/data/phase1_train.jsonlzThttps://github.com/openai/prm800k/raw/refs/heads/main/prm800k/data/phase1_test.jsonl)traintestjson)
data_filesT
   )labeler	timestamp
generationis_quality_control_questionis_initial_screening_questionr   r   )batchedr<   remove_columnsnum_procdataset)	repo_typeN)dataclassesr   r   typingr   datasetsr   huggingface_hubr   transformersr   r   r/   r=   
model_cardr   parserparse_args_into_dataclassesscript_argsrB   rL   mapr   r   r   r   r   r   r   <module>   s:   '	