o
    	Ti7                     @   sx   d dl mZmZ d dlmZ d dlmZ eG dd dZdd Ze	dkr:eeZ
e
 d  Zeejejej d	S d	S )
    )	dataclassfield)Dataset)HfArgumentParserc                   @   sZ   e Zd ZU dZedddidZeed< edddidZe	ed	< ed
ddidZ
eed< dS )ScriptArgumentsa  
    Arguments for the script.

    Args:
        test_size (`float`, *optional*, defaults to `0.1`):
            Fraction of the dataset to include in the test split.
        push_to_hub (`bool`, *optional*, defaults to `False`):
            Whether to push the dataset to the Hugging Face Hub.
        repo_id (`str`, *optional*, defaults to `"trl-internal-testing/harmony"`):
            Hugging Face repository ID to push the dataset to.
    g?helpz5Fraction of the dataset to include in the test split.)defaultmetadata	test_sizeFz4Whether to push the dataset to the Hugging Face Hub.push_to_hubztrl-internal-testing/harmonyz2Hugging Face repository ID to push the dataset to.repo_idN)__name__
__module____qualname____doc__r   r
   float__annotations__r   boolr   str r   r   T/home/ubuntu/.local/lib/python3.10/site-packages/scripts/generate_harmony_dataset.pyr      s   
 r   c                 C   s  t dddddddgddddd	d
dgdddddddgdddddddgdddddddgdddddddgdddddddgdddddddgdddddddgdd ddd!d"dgdd#ddd$d%dgdd&ddd'd(dgdd)ddd*d+dgdd,ddd-d.dgdd/ddd0d1dgdd2ddd3d4dgdd5ddd6d7dgdd8ddd9d:dgdd;ddd<d=dggd>d?d@dAd?d@dBd?d@d>d?d@dAd?d@dBd?d@d>d?d@dAd?d@dBd?d@d>d?d@dAd?d@dBd?d@d>d?d@dAd?d@dBd?d@d>d?d@dAd?d@dBd?d@d>d?d@gdC}|j| dDdE}|r#|j|dFdG ||dH  t dddgdddgdddgdddgdddgdddgdddgdddgdddgdd dgdd#dgdd&dgdd)dgdd,dgdd/dgdd2dgdd5dgdd8dgdd;dggddddgdd	d
dgddddgddddgddddgddddgddddgddddgddddgdd!d"dgdd$d%dgdd'd(dgdd*d+dgdd-d.dgdd0d1dgdd3d4dgdd6d7dgdd9d:dgdd<d=dggd>d?d@dAd?d@dBd?d@d>d?d@dAd?d@dBd?d@d>d?d@dAd?d@dBd?d@d>d?d@dAd?d@dBd?d@d>d?d@dAd?d@dBd?d@d>d?d@dAd?d@dBd?d@d>d?d@gdI}|j| dDdE}|ra|j|dJdG ||dK  d S )LNuserzWhat is better than ugly?)rolecontent	assistantz#Beauty improves clarity and appeal.z
Beautiful.)r   thinkingr   zWhat is better than implicit?zClarity avoids confusion.z	Explicit.zWhat is better than complex?zSimplicity is easier to manage.zSimple.z What is better than complicated?u1   Complexity has structure; complication doesn’t.zComplex.zWhat is better than nested?z#Flat structures are easier to read.zFlat.zWhat is better than dense?zSpacing aids understanding.zSparse.zWhat counts?zReadable code lasts longer.zReadability.z,Are special cases enough to break the rules?zConsistency is more valuable.z;No, special cases aren't special enough to break the rules.zWhat beats purity?z!Real-world needs outweigh ideals.zPracticality.z What should never pass silently?z"Silent errors cause hidden issues.zErrors.zWhen can errors pass silently?zSilence must be intentional.zWhen explicitly silenced.z,What should you do in the face of ambiguity?z Guessing leads to wrong choices.zRefuse the temptation to guess.z'How many ways should there be to do it?zOne way reduces confusion.zOne, and preferably only one.z-For whom may the way not be obvious at first?u$   A playful nod to Python’s creator.zDutch.zWhat is better than never?z$Action is better than endless delay.zNow is better than never.z!Is never better than *right* now?zRushed action can be worse.zYes, often.z;What does it mean if the implementation is hard to explain?zGood ideas should be clear.zIt means it's a bad idea.z;What does it mean if the implementation is easy to explain?zClarity suggests soundness.zIt means it may be a good idea.zAny great ideas?zNamespaces prevent conflicts.z&Namespaces are one honking great idea.lowz,You are Tiny ChatGPT, a tiny language model.)reasoning_effortmodel_identitymediumhigh)messageschat_template_kwargsF)r
   shufflelanguage_modeling)config_namez/language_modeling)prompt
completionr"   prompt_completionz/prompt_completion)r   	from_dicttrain_test_splitr   save_to_disk)r
   r   r   language_modeling_datasetprompt_completion_datasetr   r   r   main1   s   ,


















Ar.   __main__N)dataclassesr   r   datasetsr   transformersr   r   r.   r   parserparse_args_into_dataclassesscript_argsr
   r   r   r   r   r   r   <module>   s   z