o
    	Ti                     @   s  d dl Z d dlZd dlmZ d dlmZ d dlmZmZm	Z	m
Z
 d dlmZmZmZmZmZmZmZ d dlmZ 	 edkrTe
eeefZe \ZZZe jejdd	 ejd
v r\ejneeejZeeZe ej!ej"eedurse ndedZ#e	j$ej%dej&dZ'e'(ddi e'j)du ree'_)ej$ej*ej&ddZ+ej$ej*ej&ddZ,ej$ej-ej&dZ.eeZ/e/du rej$ej-ej&dZ0ndZ0eej1ej2dZ3e3ej4 Z5ej6dkre3ej7 ndZ8dd Z9e : / e9e5e'Z5e8dure9e8e'Z8e5j;dd ej<dZ5e8dure8j;dd ej<dZ8W d   n	1 sw   Y  e5d  d d e'j=ks,J deee'e.e0e,e+e5e8e/d	Z>e>?  e>@ej ejArNe>jAej1d e>B  dS dS )     N)PartialState)load_dataset)AutoModelForCausalLM"AutoModelForSequenceClassificationAutoTokenizerHfArgumentParser)ModelConfig	PPOConfig
PPOTrainerScriptArgumentsget_kbit_device_mapget_peft_configget_quantization_config)SIMPLE_CHAT_TEMPLATE__main__T)ignore_errors)autoN)revisionattn_implementationtorch_dtype
device_mapquantization_configleft)padding_sidetrust_remote_code	pad_tokenz[PAD]   )r   
num_labels)r   )namenoc                    s     fdd}| j || jtjdS )zFpre-tokenize the dataset before training; only collate during trainingc                    s*    j | d d d ddd}|t|dS )Nmessagesr   FT)paddingadd_generation_prompt)	input_idslengths)apply_chat_templatelen)elementr#   	tokenizer Q/home/ubuntu/.local/lib/python3.10/site-packages/examples/scripts/ppo/ppo_tldr.pytokenize   s   z!prepare_dataset.<locals>.tokenize)remove_columnsnum_proc)mapcolumn_namestraining_argsdataset_num_proc)datasetr)   r,   r*   r(   r+   prepare_dataset   s   r4   c                 C      | d dkS Nr$   i   r*   xr*   r*   r+   <lambda>       r9   )r.   c                 C   r5   r6   r*   r7   r*   r*   r+   r9      r:   r#   z)The last token should not be an EOS token)	argsprocessing_classmodel	ref_modelreward_modelvalue_modeltrain_dataseteval_datasetpeft_config)dataset_name)Cshutiltorch
accelerater   datasetsr   transformersr   r   r   r   trlr   r	   r
   r   r   r   r   trl.trainer.utilsr   __name__parserparse_args_into_dataclassesscript_argsr1   
model_argsrmtree
output_dirr   getattrr   dictmodel_revisionr   model_kwargsfrom_pretrainedmodel_name_or_pathr   r)   add_special_tokenschat_templatereward_model_pathrA   r@   sft_model_pathpolicyrD   
ref_policyrE   dataset_configr3   dataset_train_splitrB   eval_strategydataset_test_splitrC   r4   local_main_process_firstfilterr2   eos_token_idtrainertrain
save_modelpush_to_hubgenerate_completionsr*   r*   r*   r+   <module>   s   $	
&







 	