o
    	Ti                     @   s  d Z ddlZddlZddlmZ ddlmZmZmZ ddl	m
Z
mZmZmZmZmZmZmZ edkreeee
fZe \ZZZedde_ejd	v rNejneeejZeeZeejedurbe ndeejridnd
edZej ej!ej"d
dZ#ej ej!fdej"deZ$e#j%e$j&_%e#j'du ree$e#\Z$Z#ej(rej)dkre*de+ eej,ej-dZ.ee$e#ee.ej/ ej0dkre.ej1 ndeedZ2e23  e24ej5 ej0dkre26 Z7e28de7 e29de7 e24ej5 ej:re2j:ej,d dS dS dS )a1  
Full training:
python examples/scripts/reward_modeling.py     --model_name_or_path Qwen/Qwen2-0.5B-Instruct     --dataset_name trl-lib/ultrafeedback_binarized     --output_dir Qwen2-0.5B-Reward     --per_device_train_batch_size 8     --num_train_epochs 1     --gradient_checkpointing True     --learning_rate 1.0e-5     --eval_strategy steps     --eval_steps 50     --max_length 2048

LoRA:
python examples/scripts/reward_modeling.py     --model_name_or_path Qwen/Qwen2-0.5B-Instruct     --dataset_name trl-lib/ultrafeedback_binarized     --output_dir Qwen2-0.5B-Reward-LoRA     --per_device_train_batch_size 8     --num_train_epochs 1     --gradient_checkpointing True     --learning_rate 1.0e-4     --eval_strategy steps     --eval_steps 50     --max_length 2048     --use_peft     --lora_r 32     --lora_alpha 16
    N)load_dataset)"AutoModelForSequenceClassificationAutoTokenizerHfArgumentParser)ModelConfigRewardConfigRewardTrainerScriptArgumentsget_kbit_device_mapget_peft_configget_quantization_configsetup_chat_format__main__F)use_reentrant)autoNT)revision
device_mapquantization_config	use_cachetorch_dtype)trust_remote_codeuse_fast   )
num_labelsr   SEQ_CLSzYou are using a `task_type` that is different than `SEQ_CLS` for PEFT. This will lead to silent bugs Make sure to pass --lora_task_type SEQ_CLS when using this script with PEFT.)nameno)modelprocessing_classargstrain_dataseteval_datasetpeft_configeval)dataset_name);__doc__warningstorchdatasetsr   transformersr   r   r   trlr   r   r   r	   r
   r   r   r   __name__parserparse_args_into_dataclassesscript_argstraining_args
model_argsdictgradient_checkpointing_kwargsr   getattrr   model_revisiongradient_checkpointingmodel_kwargsfrom_pretrainedmodel_name_or_pathr   	tokenizerr   pad_token_idconfigchat_templateuse_peftlora_task_typewarnUserWarningr$   dataset_configdatasetdataset_train_spliteval_strategydataset_test_splittrainertrain
save_model
output_direvaluatemetricslog_metricssave_metricspush_to_hub rO   rO   T/home/ubuntu/.local/lib/python3.10/site-packages/examples/scripts/reward_modeling.py<module>   st   (


	
E