o
    	Ti                     @   s   d Z ddlZddlZddlmZ ddlmZmZ ddlm	Z	m
Z
mZmZmZmZmZmZ ddlmZ dd Zdd	ejfd
dZedkrUe Ze \ZZZeeee dS dS )a  
# Full training
```bash
python trl/scripts/dpo.py     --dataset_name trl-lib/ultrafeedback_binarized     --dataset_streaming     --model_name_or_path Qwen/Qwen2-0.5B-Instruct     --learning_rate 5.0e-7     --num_train_epochs 1     --per_device_train_batch_size 2     --gradient_accumulation_steps 8     --gradient_checkpointing     --eval_strategy steps     --eval_steps 50     --output_dir Qwen2-0.5B-DPO     --no_remove_unused_columns
    --report_to wandb
```

# LoRA:
```bash
python trl/scripts/dpo.py     --dataset_name trl-lib/ultrafeedback_binarized     --dataset_streaming     --model_name_or_path Qwen/Qwen2-0.5B-Instruct     --learning_rate 5.0e-6     --num_train_epochs 1     --per_device_train_batch_size 2     --gradient_accumulation_steps 8     --gradient_checkpointing     --eval_strategy steps     --eval_steps 50     --output_dir Qwen2-0.5B-DPO     --no_remove_unused_columns     --use_peft     --lora_r 32     --lora_alpha 16
    --report_to wandb
```
    N)load_dataset)AutoModelForCausalLMAutoTokenizer)	DPOConfig
DPOTrainerModelConfigScriptArguments	TrlParserget_kbit_device_mapget_peft_configget_quantization_config)SIMPLE_CHAT_TEMPLATEc              	   C   s  |j dv r|j ntt|j }t|}t|j|j||jrdnd|d ur%t nd |d}t	j
|jfd|ji|}t|}|d u rMt	j
|jfd|ji|}nd }tj
|j|jd}	|	jd u ra|	j|	_|	jd u rit|	_| jrvdd | D |_t| j| j| jd	}
t||||
| j |jd
kr|
| j nd |	|d}|  |jd
kr| }| d| |!d| |"|j# |j$r|j$| jd d S d S )N)autoNFT)revisionattn_implementationtorch_dtype	use_cache
device_mapquantization_configtrust_remote_code)r   c                 S   s    g | ]\}}|j tjkr|qS  )dtypetorchbool).0namebufferr   r   C/home/ubuntu/.local/lib/python3.10/site-packages/trl/scripts/dpo.py
<listcomp>u   s    zmain.<locals>.<listcomp>)r   	streamingno)argstrain_dataseteval_datasetprocessing_classpeft_configeval)dataset_name)%r   getattrr   r   dictmodel_revisionr   gradient_checkpointingr
   r   from_pretrainedmodel_name_or_pathr   r   r   	pad_token	eos_tokenchat_templater   ignore_bias_buffersnamed_buffers!_ddp_params_and_buffers_to_ignorer   r'   dataset_configdataset_streamingr   dataset_train_spliteval_strategydataset_test_splittrainevaluatelog_metricssave_metrics
save_model
output_dirpush_to_hub)script_argstraining_args
model_argsr   r   model_kwargsmodelr%   	ref_model	tokenizerdatasettrainermetricsr   r   r   mainR   s|   

	

rJ   
subparsersc                 C   s2   t ttf}| d ur| jdd|d}|S t|}|S )NdpozRun the DPO training script)helpdataclass_types)r   r   r   
add_parserr	   )rK   rN   parserr   r   r   make_parser   s   
rQ   __main__)N)__doc__argparser   datasetsr   transformersr   r   trlr   r   r   r   r	   r
   r   r   trl.trainer.utilsr   rJ   _SubParsersActionrQ   __name__rP   parse_args_and_configr@   rA   rB   r   r   r   r   <module>   s   )(
J	