o
    	Ti^                     @   s  d Z ddlmZ ddlZddlm  mZ ddlm	Z	 ddl
mZ ddlmZmZmZmZmZ ddlmZmZmZmZmZmZ dejd	ejd
efddZedkreeeefZe \ZZZ ddie_!ej"e j#e j$dZ%ej"e j#e j$dZ&ej"e j#e j$dZ'e'j(du re'j)e'_(e'j*du ree%e'\Z%Z'eej+ej,dZ-e	 Z.ej"de j$dej/ddZ0e.1e0Z0ej"de j$dZ2eee0dZ3ee%e&ee-ej4 ej5dkre-ej6 nde'ee e3e2d	Z7e78  e79ej: ej;re7j;ej+d dS dS dS )a  
Run the BCO training script with the commands below. In general, the optimal configuration for BCO will be similar to that of KTO.

# Full training:
python examples/scripts/bco.py     --model_name_or_path Qwen/Qwen2.5-0.5B-Instruct     --trust_remote_code     --dataset_name trl-lib/ultrafeedback-gpt-3.5-turbo-helpfulness     --per_device_train_batch_size 16     --per_device_eval_batch_size 32     --num_train_epochs 1     --learning_rate 1e-6     --gradient_checkpointing     --gradient_accumulation_steps 1     --eval_steps 0.2     --save_strategy no     --output_dir=bco-aligned-model     --logging_first_step     --max_length 2048     --max_prompt_length 1536     --max_completion_length 1024     --no_remove_unused_columns     --warmup_ratio 0.1     --bf16     --report_to wandb

# QLoRA:
python examples/scripts/bco.py     --model_name_or_path=nnheui/stablelm-2-1_6b-sft-full     --per_device_train_batch_size 16     --per_device_eval_batch_size 32     --num_train_epochs 1     --learning_rate 1e-6     --gradient_checkpointing     --gradient_accumulation_steps 1     --eval_steps 0.2     --save_strategy no     --output_dir=bco-aligned-model-lora     --logging_first_step     --warmup_ratio 0.1     --report_to wandb     --max_length 2048     --max_prompt_length 1536     --max_completion_length 1024     --no_remove_unused_columns     --warmup_ratio 0.1     --bf16     --use_peft     --load_in_4bit     --lora_target_modules=all-linear     --lora_r=16     --lora_alpha=16
    )partialN)Accelerator)load_dataset)	AutoModelAutoModelForCausalLMAutoTokenizerHfArgumentParserPreTrainedModel)	BCOConfig
BCOTrainerModelConfigScriptArgumentsget_peft_configsetup_chat_format	input_idsattention_maskmodelc                 C   s   dd }t   || |d}|||}W d   n1 sw   Y  d}tj|ddd}tj||jd fd	}|ddd|f }|S )
zZ
    Borrowed from https://huggingface.co/nomic-ai/nomic-embed-text-v1.5#transformers
    c                 S   sD   | d }| d|  }t|| dtj|ddd S )Nr      g&.>)min)	unsqueezeexpandsizefloattorchsumclamp)model_outputr   token_embeddingsinput_mask_expanded r    H/home/ubuntu/.local/lib/python3.10/site-packages/examples/scripts/bco.pymean_pooling\   s   $z"embed_prompt.<locals>.mean_pooling)r   r   Ni      r   )pdim)normalized_shape)r   no_gradF	normalize
layer_normshape)r   r   r   r"   r   
embeddingsmatryoshka_dimr    r    r!   embed_promptW   s   
r.   __main__use_reentrantT)trust_remote_code)nameznomic-ai/nomic-embed-text-v1.5auto)r1   safe_serializationtorch_dtype
device_mapzbert-base-uncased)r   no)argstrain_dataseteval_datasetprocessing_classpeft_configembedding_funcembedding_tokenizer)dataset_name)<__doc__	functoolsr   r   torch.nn.functionalnn
functionalr(   
accelerater   datasetsr   transformersr   r   r   r   r	   trlr
   r   r   r   r   r   
LongTensorr.   __name__parserparse_args_into_dataclassesscript_argstraining_args
model_argsgradient_checkpointing_kwargsfrom_pretrainedmodel_name_or_pathr1   r   	ref_model	tokenizer	pad_token	eos_tokenchat_templater?   dataset_configdatasetacceleratorbfloat16embedding_modelprepare_modelr>   r=   dataset_train_spliteval_strategydataset_test_splittrainertrain
save_model
output_dirpush_to_hubr    r    r    r!   <module>   sx   6 



=