o
    	Ti                     @   s,  d Z ddlmZ ddlmZmZmZ ddlmZm	Z	m
Z
mZmZ ddlmZ edkreeee
fZe \ZZZejejejdZejejejdZejdu rSeje_eejejd	Zejdu rcee_e	eeeej  ej!d
krteej" ndeeedZ#e#$  e#%ej& ej're#j'ejd dS dS dS )a"  
Run the CPO training script with the following command with some example arguments.
In general, the optimal configuration for CPO will be similar to that of DPO:

# regular:
python examples/scripts/cpo.py     --dataset_name trl-lib/ultrafeedback_binarized     --model_name_or_path=gpt2     --per_device_train_batch_size 4     --max_steps 1000     --learning_rate 8e-6     --gradient_accumulation_steps 1     --eval_steps 500     --output_dir="gpt2-aligned-cpo"     --warmup_steps 150     --report_to wandb     --bf16     --logging_first_step     --no_remove_unused_columns

# peft:
python examples/scripts/cpo.py     --dataset_name trl-lib/ultrafeedback_binarized     --model_name_or_path=gpt2     --per_device_train_batch_size 4     --max_steps 1000     --learning_rate 8e-5     --gradient_accumulation_steps 1     --eval_steps 500     --output_dir="gpt2-lora-aligned-cpo"     --optim rmsprop     --warmup_steps 150     --report_to wandb     --bf16     --logging_first_step     --no_remove_unused_columns     --use_peft     --lora_r=16     --lora_alpha=16
    )load_dataset)AutoModelForCausalLMAutoTokenizerHfArgumentParser)	CPOConfig
CPOTrainerModelConfigScriptArgumentsget_peft_config)SIMPLE_CHAT_TEMPLATE__main__)trust_remote_codeN)nameno)argstrain_dataseteval_datasetprocessing_classpeft_config)dataset_name)(__doc__datasetsr   transformersr   r   r   trlr   r   r   r	   r
   trl.trainer.utilsr   __name__parserparse_args_into_dataclassesscript_argstraining_args
model_argsfrom_pretrainedmodel_name_or_pathr   model	tokenizer	pad_token	eos_tokenr   dataset_configdatasetchat_templatedataset_train_spliteval_strategydataset_test_splittrainertrain
save_model
output_dirpush_to_hub r2   r2   H/home/ubuntu/.local/lib/python3.10/site-packages/examples/scripts/cpo.py<module>   sB   )


(