o
    	Ti?                  	   @   s
  d dl Z d dlmZ d dlmZ d dlmZmZmZm	Z	 d dl
mZmZmZmZ d dlmZ 	 edkre	eeefZe \ZZZe jejdd	 ejejd
ejdZeddi ejdu rcee_ejejejddZ ejej!ejdZ"ejej!ejdZ#eej$ej%dZ&e&ej' Z(ej)dkre&ej* ndZ+dd Z,e - & e,e(eZ(e,e+eZ+e(j.dd ej/dZ(e+j.dd ej/dZ+W d   n1 sw   Y  e(d  d d ej0ksJ deeee#e"e e(e+dZ1e12  e13ej ej4re1j4ej$d e15  dS dS )    N)PartialState)load_dataset)AutoModelForCausalLM"AutoModelForSequenceClassificationAutoTokenizerHfArgumentParser)ModelConfig
RLOOConfigRLOOTrainerScriptArguments)SIMPLE_CHAT_TEMPLATE__main__T)ignore_errorsleft)padding_sidetrust_remote_code	pad_tokenz[PAD]   )r   
num_labels)r   )namenoc                    s     fdd}| j || jtjdS )zFpre-tokenize the dataset before training; only collate during trainingc                    s*    j | d d d ddd}|t|dS )Nmessagesr   FT)paddingadd_generation_prompt)	input_idslengths)apply_chat_templatelen)elementr   	tokenizer S/home/ubuntu/.local/lib/python3.10/site-packages/examples/scripts/rloo/rloo_tldr.pytokenizej   s   z!prepare_dataset.<locals>.tokenize)remove_columnsnum_proc)mapcolumn_namestraining_argsdataset_num_proc)datasetr    r#   r!   r   r"   prepare_datasetg   s   r+   c                 C      | d dkS Nr   i   r!   xr!   r!   r"   <lambda>~       r0   )r%   c                 C   r,   r-   r!   r.   r!   r!   r"   r0      r1   r   z)The last token should not be an EOS token)configprocessing_classpolicy
ref_policyreward_modeltrain_dataseteval_dataset)dataset_name)6shutil
accelerater   datasetsr   transformersr   r   r   r   trlr   r	   r
   r   trl.trainer.utilsr   __name__parserparse_args_into_dataclassesscript_argsr(   
model_argsrmtree
output_dirfrom_pretrainedmodel_name_or_pathr   r    add_special_tokenschat_templatereward_model_pathr7   sft_model_pathr6   r5   r:   dataset_configr*   dataset_train_splitr8   eval_strategydataset_test_splitr9   r+   local_main_process_firstfilterr)   eos_token_idtrainertrain
save_modelpush_to_hubgenerate_completionsr!   r!   r!   r"   <module>   sh   
$





	