o
    	TiW                     @   s,  d Z ddlmZ ddlmZmZmZ ddlmZm	Z	m
Z
mZmZ ddlmZ edkreee	efZe \ZZZejejejdZejejejdZejdu rSeje_eejejd	Zejdu rcee_e
eeeej  ej!d
krteej" ndeeedZ#e#$  e#%ej& ej're#j'ejd dS dS dS )aw  
Run the ORPO training script with the following command with some example arguments.
In general, the optimal configuration for ORPO will be similar to that of DPO without the need for a reference model:

# regular:
python examples/scripts/orpo.py     --dataset_name trl-internal-testing/hh-rlhf-helpful-base-trl-style     --model_name_or_path=gpt2     --per_device_train_batch_size 4     --max_steps 1000     --learning_rate 8e-6     --gradient_accumulation_steps 1     --eval_steps 500     --output_dir="gpt2-aligned-orpo"     --warmup_steps 150     --report_to wandb     --bf16     --logging_first_step     --no_remove_unused_columns

# peft:
python examples/scripts/orpo.py     --dataset_name trl-internal-testing/hh-rlhf-helpful-base-trl-style     --model_name_or_path=gpt2     --per_device_train_batch_size 4     --max_steps 1000     --learning_rate 8e-5     --gradient_accumulation_steps 1     --eval_steps 500     --output_dir="gpt2-lora-aligned-orpo"     --optim rmsprop     --warmup_steps 150     --report_to wandb     --bf16     --logging_first_step     --no_remove_unused_columns     --use_peft     --lora_r=16     --lora_alpha=16
    )load_dataset)AutoModelForCausalLMAutoTokenizerHfArgumentParser)ModelConfig
ORPOConfigORPOTrainerScriptArgumentsget_peft_config)SIMPLE_CHAT_TEMPLATE__main__)trust_remote_codeN)nameno)argstrain_dataseteval_datasetprocessing_classpeft_config)dataset_name)(__doc__datasetsr   transformersr   r   r   trlr   r   r   r	   r
   trl.trainer.utilsr   __name__parserparse_args_into_dataclassesscript_argstraining_args
model_argsfrom_pretrainedmodel_name_or_pathr   model	tokenizer	pad_token	eos_tokenr   dataset_configdatasetchat_templatedataset_train_spliteval_strategydataset_test_splittrainertrain
save_model
output_dirpush_to_hub r2   r2   I/home/ubuntu/.local/lib/python3.10/site-packages/examples/scripts/orpo.py<module>   sB   )


(