o
    	Ti                  
   @   sd  d Z ddlZddlmZ ddlmZmZmZmZ ddl	m
Z
mZmZmZmZmZmZmZmZmZmZ ddlmZ eee
dZedkr.eeeefZe \ZZZd	d
ie_ejdv r^ejne eejZeeZ!e"ej#ej$eej%rsdnd
e!dur{e nde!dZ&ej'ej(fdej)ie&Z*ej'ej(fdej)ie&Z+ej,durej'ej,fdej)de&Z-ndZ-ej.dureej. Z/e/ Z.ndZ.ej'ej(dej)dZ0e0j1du re0j2e0_1e0j3du ree0_3eej4ej5dZ6ee*e+e-e.ee6ej7 ej8dkre6ej9 nde0dZ:ej8dkreej;d
ej<dZ=ee:e=ddZ>e:?e> e:@  e:AejB ejCr0e:jCej4d dS dS dS )a  
Usage:

python examples/scripts/nash_md.py     --model_name_or_path trl-lib/pythia-1b-deduped-tldr-sft      --reward_model_path trl-lib/pythia-1b-deduped-tldr-rm     --dataset_name trl-lib/tldr     --learning_rate 5.0e-7     --output_dir pythia-1b-tldr-nash-md     --per_device_train_batch_size 4     --gradient_accumulation_steps 32     --num_train_epochs 3     --max_new_tokens 64     --warmup_ratio 0.1     --missing_eos_penalty 1.0     --push_to_hub


accelerate launch --config_file examples/accelerate_configs/deepspeed_zero2.yaml     examples/scripts/nash_md.py     --model_name_or_path trl-lib/pythia-1b-deduped-tldr-sft      --reward_model_path trl-lib/pythia-1b-deduped-tldr-rm     --dataset_name trl-lib/tldr     --learning_rate 5.0e-7     --output_dir pythia-1b-tldr-nash-md     --per_device_train_batch_size 4     --gradient_accumulation_steps 32     --num_train_epochs 3     --max_new_tokens 64     --warmup_ratio 0.1     --missing_eos_penalty 1.0     --push_to_hub
    N)load_dataset)AutoModelForCausalLM"AutoModelForSequenceClassificationAutoTokenizerGenerationConfig)HfPairwiseJudgeLogCompletionsCallbackModelConfigNashMDConfigNashMDTrainerOpenAIPairwiseJudgePairRMJudgeScriptArguments	TrlParserget_kbit_device_mapget_quantization_config)SIMPLE_CHAT_TEMPLATE)pair_rmopenaihf__main__use_reentrantT)autoNF)revisionattn_implementationtorch_dtype	use_cache
device_mapquantization_configtrust_remote_code   )
num_labelsr   left)padding_sider   )nameno)model	ref_modelreward_modeljudgeargstrain_dataseteval_datasetprocessing_class)max_new_tokens	do_sampletemperature   )num_prompts)dataset_name)D__doc__torchdatasetsr   transformersr   r   r   r   trlr   r   r	   r
   r   r   r   r   r   r   r   trl.trainer.utilsr   JUDGES__name__parserparse_args_and_configscript_argstraining_args
model_argsgradient_checkpointing_kwargsr   getattrr   dictmodel_revisionr   gradient_checkpointingmodel_kwargsfrom_pretrainedmodel_name_or_pathr   r&   r'   reward_model_pathr(   r)   	judge_cls	tokenizer	pad_token	eos_tokenchat_templater3   dataset_configdatasetdataset_train_spliteval_strategydataset_test_splittrainerr.   r0   generation_configcompletions_callbackadd_callbacktrain
save_model
output_dirpush_to_hub r\   r\   L/home/ubuntu/.local/lib/python3.10/site-packages/examples/scripts/nash_md.py<module>   s   "4

	







I