o
    	Tia                     @   s   d Z ddlZddlmZ ddlmZmZ ddlmZm	Z	m
Z
mZmZmZmZ dd Zddejfd	d
ZedkrIe Ze \ZZZeeee dS dS )aF  
Run the KTO training script with the commands below. In general, the optimal configuration for KTO will be similar to
that of DPO.

# Full training:
```bash
python trl/scripts/kto.py     --dataset_name trl-lib/kto-mix-14k     --model_name_or_path=trl-lib/qwen1.5-1.8b-sft     --per_device_train_batch_size 16     --num_train_epochs 1     --learning_rate 5e-7     --lr_scheduler_type=cosine     --gradient_accumulation_steps 1     --eval_steps 500     --output_dir=kto-aligned-model     --warmup_ratio 0.1     --report_to wandb     --logging_first_step
```

# QLoRA:
```bash
# QLoRA:
python trl/scripts/kto.py     --dataset_name trl-lib/kto-mix-14k     --model_name_or_path=trl-lib/qwen1.5-1.8b-sft     --per_device_train_batch_size 8     --num_train_epochs 1     --learning_rate 5e-7     --lr_scheduler_type=cosine     --gradient_accumulation_steps 1     --eval_steps 500     --output_dir=kto-aligned-model-lora     --warmup_ratio 0.1     --report_to wandb     --logging_first_step     --use_peft     --load_in_4bit     --lora_target_modules=all-linear     --lora_r=16     --lora_alpha=16
```
    N)load_dataset)AutoModelForCausalLMAutoTokenizer)	KTOConfig
KTOTrainerModelConfigScriptArguments	TrlParserget_peft_configsetup_chat_formatc              	   C   s   t j|j|jd}t j|j|jd}tj|j|jd}|jd u r$|j|_|jd u r0t||\}}t	| j
| jd}t||||| j |jdkrJ|| j nd |t|d}|  ||j |jrh|j| j
d d S d S )N)trust_remote_code)nameno)argstrain_dataseteval_datasetprocessing_classpeft_config)dataset_name)r   from_pretrainedmodel_name_or_pathr   r   	pad_token	eos_tokenchat_templater   r   r   dataset_configr   dataset_train_spliteval_strategydataset_test_splitr
   train
save_model
output_dirpush_to_hub)script_argstraining_args
model_argsmodel	ref_model	tokenizerdatasettrainer r*   C/home/ubuntu/.local/lib/python3.10/site-packages/trl/scripts/kto.pymainS   s8   

r,   
subparsersc                 C   s2   t ttf}| d ur| jdd|d}|S t|}|S )NktozRun the KTO training script)helpdataclass_types)r   r   r   
add_parserr	   )r-   r0   parserr*   r*   r+   make_parser}   s   
r3   __main__)N)__doc__argparsedatasetsr   transformersr   r   trlr   r   r   r   r	   r
   r   r,   _SubParsersActionr3   __name__r2   parse_args_and_configr"   r#   r$   r*   r*   r*   r+   <module>   s   -$*	