o
    	Ti                     @   sn  d dl Z d dlZd dlmZ d dlmZ d dlmZmZm	Z	m
Z
 d dlmZmZmZmZmZmZmZ d dlmZ 	 edkr5e
eeefZe \ZZZe jejdd	 ejd
v r\ejneeejZeeZe ej!ej"eedurse ndedZ#e	j$ej%dej&dZ'e'(ddi e'j)du ree'_)ej$ej*ej&ddZ+ej$ej*ej&ddZ,ej$ej-ej&dZ.eeZ/e/du rej$ej-ej&dZ0ndZ0eej1ej2ej3dZ4dZ5e46e7e8e4e5 Z9e46e7e8e4e5 e8e4Z:dZ;dd Z<e =  e<e9e'Z9e<e:e'Z:W d   n	1 sw   Y  eee'e.e0e,e+e9e:e/d	Z>e>?  e>@ej ejAr/e>jAej1d e>B  dS dS )    N)PartialState)load_dataset)AutoModelForCausalLM"AutoModelForSequenceClassificationAutoTokenizerHfArgumentParser)ModelConfig	PPOConfig
PPOTrainerScriptArgumentsget_kbit_device_mapget_peft_configget_quantization_config)SIMPLE_CHAT_TEMPLATE__main__T)ignore_errors)autoN)revisionattn_implementationtorch_dtype
device_mapquantization_configleft)padding_sidetrust_remote_code	pad_tokenz[PAD]   )r   
num_labels)r   )namesplitd   promptc                    s"    fdd}| j |d| jtjdS )zFpre-tokenize the dataset before training; only collate during trainingc                    s    | t  dd}d|d iS )NF)padding	input_ids)dataset_text_field)elementoutputs	tokenizer L/home/ubuntu/.local/lib/python3.10/site-packages/examples/scripts/ppo/ppo.pytokenize   s
   z!prepare_dataset.<locals>.tokenizeT)batchedremove_columnsnum_proc)mapcolumn_namestraining_argsdataset_num_proc)datasetr(   r+   r)   r'   r*   prepare_dataset   s   r4   )	argsprocessing_classmodel	ref_modelreward_modelvalue_modeltrain_dataseteval_datasetpeft_config)dataset_name)Cshutiltorch
accelerater   datasetsr   transformersr   r   r   r   trlr   r	   r
   r   r   r   r   trl.trainer.utilsr   __name__parserparse_args_into_dataclassesscript_argsr1   
model_argsrmtree
output_dirr   getattrr   dictmodel_revisionr   model_kwargsfrom_pretrainedmodel_name_or_pathr   r(   add_special_tokenschat_templatereward_model_pathr:   r9   sft_model_pathpolicyr=   
ref_policyr>   dataset_configdataset_train_splitr3   eval_samplesselectrangelenr;   r<   r$   r4   local_main_process_firsttrainertrain
save_modelpush_to_hubgenerate_completionsr)   r)   r)   r*   <module>   s   $	





