o
    	Tin                     @   s  d Z ddlZddlZddlmZ ddlmZmZmZ ddl	m
Z
mZmZmZmZmZmZ edkreeee
fZe \ZZZedde_ejd	v rLejneeejZeeZeejedur`e ndeejrgdnd
dZejej ej!d
dZ"ejej fdej!deZ#e"j$e#j%_$ej&rej'dkre(de) eej*ej+dZ,e,-dd Z,ee#e"ee,ej. e,ej/ eedZ0e01  e02ej3 e04 Z5e06de5 e07de5 e02ej3 ej8re0j8ej*d dS dS dS )a  
Full training:
python examples/scripts/prm.py     --model_name_or_path Qwen/Qwen2-0.5B-Instruct     --dataset_name trl-lib/prm800k     --output_dir Qwen2-0.5B-Reward     --per_device_train_batch_size 8     --num_train_epochs 1     --gradient_checkpointing True     --learning_rate 1.0e-5     --eval_strategy steps     --eval_steps 50

LoRA:
python examples/scripts/prm.py     --model_name_or_path Qwen/Qwen2-0.5B-Instruct     --dataset_name trl-lib/prm800k     --output_dir Qwen2-0.5B-Reward-LoRA     --per_device_train_batch_size 8     --num_train_epochs 1     --gradient_checkpointing True     --learning_rate 1.0e-4     --eval_strategy steps     --eval_steps 50
    --use_peft     --lora_r 32     --lora_alpha 16
    N)load_dataset)AutoModelForTokenClassificationAutoTokenizerHfArgumentParser)ModelConfig	PRMConfig
PRMTrainerScriptArgumentsget_kbit_device_mapget_peft_configget_quantization_config__main__F)use_reentrant)autoNT)revision
device_mapquantization_config	use_cache)trust_remote_codeuse_fast   )
num_labelsr   	TOKEN_CLSzYou are using a `task_type` that is different than `TOKEN_CLS` for PEFT. This will lead to silent bugs Make sure to pass --lora_task_type TOKEN_CLS when using this script with PEFT.)namec                 C   s   t | d dkS )Ncompletionsr   )len)x r   H/home/ubuntu/.local/lib/python3.10/site-packages/examples/scripts/prm.py<lambda>l   s    r   )modelprocessing_classargstrain_dataseteval_datasetpeft_configeval)dataset_name)9__doc__warningstorchdatasetsr   transformersr   r   r   trlr   r   r   r	   r
   r   r   __name__parserparse_args_into_dataclassesscript_argstraining_argsmodel_configdictgradient_checkpointing_kwargstorch_dtypegetattrr   model_revisiongradient_checkpointingmodel_kwargsfrom_pretrainedmodel_name_or_pathr   	tokenizerr    pad_token_idconfiguse_peftlora_task_typewarnUserWarningr'   dataset_configdatasetfilterdataset_train_splitdataset_test_splittrainertrain
save_model
output_direvaluatemetricslog_metricssave_metricspush_to_hubr   r   r   r   <module>   sr   $



	B