o
    	Ti_                     @   s  d dl Z d dlmZmZ d dlZd dlZd dlmZ d dl	m
Z
 d dlmZmZ d dlmZmZmZmZmZmZmZmZ d dlmZ d dlmZ d	d
lmZ d	dlmZ d	dlm Z m!Z!m"Z" d	dl#m$Z$ ddl%m&Z& ddl'm(Z( e rd dl)m*Z*m+Z+ d dl,m-Z- d dl.m/Z/ d dl0m1Z1 e rd dl2Z2	d-de3e4 dedededee de5de3e4 fddZ6G dd deZ7G d d! d!eZ8d"ede3e4 d#e3e4 d$e3e4 dej9f
d%d&Z:G d'd( d(eZ;G d)d* d*eZ<G d+d, d,eZ=dS ).    N)OptionalUnion)Accelerator)AcceleratorState)gather_objectis_wandb_available)GenerationConfigPreTrainedModelPreTrainedTokenizerBaseTrainerTrainerCallbackTrainerControlTrainerStateTrainingArguments)
has_length)is_rich_available   maybe_apply_chat_template)is_mergekit_available)MergeConfigmerge_modelsupload_model_to_hf)unwrap_model_for_generation   )BasePairwiseJudge)log_table_to_comet_experiment)ConsoleGroup)Live)Panel)Progresspromptsmodel	tokenizeracceleratorgeneration_config
batch_sizereturnc                 C   s   g }t ||S}tdt| |D ]A}| |||  }	||	dddd|j}
|jdi |
d|i}t|
j|D ]\}}|t|d }|j|dd}|	| q8qW d   |S 1 s]w   Y  |S )	a  
    Generates completions for a list of pre-formatted prompts from the given model.

    Args:
        prompts (list[str]): A list of input prompts for which completions are to be generated.
        model (PreTrainedModel): The pre-trained model to be used for generation.
        tokenizer (PreTrainedTokenizerBase): The tokenizer to be used for encoding and decoding.
        accelerator (Accelerator): The accelerator to be used for model execution.
        generation_config (GenerationConfig): Configuration for text generation.
        batch_size (int, optional): The number of prompts to process in each batch. Default is 1.

    Returns:
        list[str]: A list of generated text completions corresponding to the input prompts.
    r   ptT)return_tensorspadding
truncationr&   N)skip_special_tokens )
r   rangelentodevicegeneratezip	input_idsdecodeappend)r"   r#   r$   r%   r&   r'   completionsunwrapped_modelidxbatchtokenized_batchgenerationsprompt
generation
completionr.   r.   I/home/ubuntu/.local/lib/python3.10/site-packages/trl/trainer/callbacks.py_generate_completions6   s(   
rB   c                   @   sR   e Zd ZdZdeeejjf de	e
 fddZedd Zedd	 Zd
d ZdS )SyncRefModelCallbackzC
    Callback to synchronize the model with a reference model.
    	ref_modelr%   c                 C   s   || _ || _d S N)r%   rD   )selfrD   r%   r.   r.   rA   __init__b   s   
zSyncRefModelCallback.__init__c                 C   s<   t | |  D ]\}}|jd| j|j|d q	d S )N      ?)alpha)r4   
parametersdatamul_add_)r#   target_modelrI   target_param
copy_paramr.   r.   rA   _sync_target_modelj   s   z'SyncRefModelCallback._sync_target_modelc                 C   s   t  j}|d urL|jdkrLdd l}|jjt|  t|  dd |j	 dkr:t
| || W d    d S W d    d S 1 sEw   Y  d S t
| || d S )N   r   )modifier_rank)r   deepspeed_plugin
zero_stage	deepspeedzeroGatheredParameterslistrJ   commget_rankrC   rQ   )r#   rN   rI   rT   rV   r.   r.   rA   sync_target_modelo   s   "z&SyncRefModelCallback.sync_target_modelc                 K   sR   |d }| j d ur%|j|j dkr'| jr| j|}| || j |j d S d S d S )Nr#   r   )rD   global_stepref_model_sync_stepsr%   unwrap_modelr\   ref_model_mixup_alpha)rF   argsstatecontrolkwargsr#   r.   r.   rA   on_step_end}   s   z SyncRefModelCallback.on_step_endN)__name__
__module____qualname____doc__r   r	   torchnnModuler   r   rG   staticmethodrQ   r\   re   r.   r.   r.   rA   rC   ]   s    


rC   c                   @   sT   e Zd ZdZdd Zdd Zdd Zdd	d
Zdd Zdd Z	dddZ
dd ZdS )RichProgressCallbackz`
    A [`TrainerCallback`] that displays the progress of training or evaluation using Rich.
    c                 C   sB   t  stdd | _d | _d | _d | _d | _d | _d | _d | _	d S )NzSRichProgressCallback requires the `rich` extra. To install, run `pip install rich`.)
r   ImportErrortraining_barprediction_bartraining_task_idprediction_task_id
rich_grouprich_consoletraining_statuscurrent_step)rF   r.   r.   rA   rG      s   
zRichProgressCallback.__init__c                 K   st   |j r8t | _t | _t | _| jd| _tt	t
| j| j| j| _| j  | jjd|jd| _d| _d S d S )NzNothing to log yet ...z[blue]Training the modeltotalr   )is_world_process_zeror!   rp   rq   r   ru   statusrv   r   r    r   rt   startadd_task	max_stepsrr   rw   rF   ra   rb   rc   rd   r.   r.   rA   on_train_begin   s   

z#RichProgressCallback.on_train_beginc                 K   s2   |j r| jj| j|j| j dd |j| _d S d S )NTadvanceupdate)rz   rp   r   rr   r]   rw   r   r.   r.   rA   re      s   z RichProgressCallback.on_step_endNc                 K   sN   |j r#t|r%| jd u r| jjdt|d| _| jj| jddd d S d S d S )Nz*[blue]Predicting on the evaluation datasetrx   r   Tr   )rz   r   rs   rq   r}   r0   r   )rF   ra   rb   rc   eval_dataloaderrd   r.   r.   rA   on_prediction_step   s   
z'RichProgressCallback.on_prediction_stepc                 K   0   |j r| jd ur| j| j d | _d S d S d S rE   rz   rs   rq   remove_taskr   r.   r.   rA   on_evaluate      

z RichProgressCallback.on_evaluatec                 K   r   rE   r   r   r.   r.   rA   
on_predict   r   zRichProgressCallback.on_predictc                 K   s>   |j r| jd ur|dd }| jdt|  d S d S d S )N
total_flosz[bold green]Status = )rz   rp   poprv   r   str)rF   ra   rb   rc   logsrd   _r.   r.   rA   on_log   s   zRichProgressCallback.on_logc                 K   sH   |j r"| j  d | _d | _d | _d | _d | _d | _d | _d | _	d S d S rE   )
rz   rt   stoprp   rq   rr   rs   ru   rv   rw   r   r.   r.   rA   on_train_end   s   

z!RichProgressCallback.on_train_endrE   )rf   rg   rh   ri   rG   r   re   r   r   r   r   r   r.   r.   r.   rA   rn      s    

rn   rb   r8   winner_indicesc                 C   sF   t | jgt| }tt||||}dd |D }tj|g ddS )Nc                 S   s6   g | ]}|d  |d |d d  |d d |d fqS )r   r   r   rR   r.   ).0itemr.   r.   rA   
<listcomp>   s   6 z,_win_rate_completions_df.<locals>.<listcomp>)stepr>   reference_modelpolicywinner_index)columns)r   r]   r0   rY   r4   pd	DataFrame)rb   r"   r8   r   r]   rK   
split_datar.   r.   rA   _win_rate_completions_df   s   r   c                   @   sp   e Zd ZdZ				ddededee dee d	e	d
e	fddZ
dededefddZdededefddZdS )WinRateCallbacka  
    A [`~transformers.TrainerCallback`] that computes the win rate of a model based on a reference.

    It generates completions using prompts from the evaluation dataset and compares the trained model's outputs against
    a reference. The reference is either the initial version of the model (before training) or the reference model, if
    available in the trainer. During each evaluation step, a judge determines how often the trained model's completions
    win against the reference using a judge. The win rate is then logged in the trainer's logs under the key
    `"eval_win_rate"`.

    Usage:
    ```python
    trainer = DPOTrainer(...)
    judge = PairRMJudge()
    win_rate_callback = WinRateCallback(judge=judge, trainer=trainer)
    trainer.add_callback(win_rate_callback)
    ```

    Args:
        judge (`BasePairwiseJudge`):
            The judge to use for comparing completions.
        trainer (`Trainer`):
            Trainer to which the callback will be attached. The trainer's evaluation dataset must include a `"prompt"`
            column containing the prompts for generating completions. If the `Trainer` has a reference model (via the
            `ref_model` attribute), it will use this reference model for generating the reference completions;
            otherwise, it defaults to using the initial model.
        generation_config (`GenerationConfig`, *optional*):
            The generation config to use for generating completions.
        num_prompts (`int` or `None`, *optional*, defaults to `None`):
            The number of prompts to generate completions for. If not provided, defaults to the number of examples in
            the evaluation dataset.
        shuffle_order (`bool`, *optional*, defaults to `True`):
            Whether to shuffle the order of the completions before judging.
        use_soft_judge (`bool`, *optional*, defaults to `False`):
            Whether to use a soft judge that returns a win probability between 0 and 1 for the first completion vs the
            second.
    NTFjudgetrainerr&   num_promptsshuffle_orderuse_soft_judgec                 C   sd   || _ || _|| _|| _g | _|| _| jjd u rtd| jj| _|d ur0| jt	|| _d S d S )NzCTrainer must have an evaluation dataset to use the WinRateCallback.)
r   r   r   r&   ref_completionsr   eval_dataset
ValueErrorselectr/   )rF   r   r   r&   r   r   r   r.   r.   rA   rG     s   	
zWinRateCallback.__init__ra   rb   rc   c              	   K   s  |d }d|_ | jj}t| jdd }|d u r| jj}|| jd M}t||||| j|j	d| _
tt| j
| j
}	| jrS| jj||	| jdd}
dd	 |
D }t|
}
n	| j||	| j}t|}t|	}	t|}W d    n1 srw   Y  | jjjrtd
d |D t| }| jrdt|
t|
  }| j||d n| jd|i d|jv rdd l}|jd urt|||	|d}|d|j|di d|jv rt|||	|d}td|d d S d S d S )Nprocessing_classleftrD   r>   r#   r$   r%   r&   r'   Treturn_scoresc                 S      g | ]
}|d kr
dndqS g      ?r   r   r.   r   scorer.   r.   rA   r   ;      z2WinRateCallback.on_train_begin.<locals>.<listcomp>c                 s       | ]}|d kV  qdS r   Nr.   r   
winner_idxr.   r.   rA   	<genexpr>E      z1WinRateCallback.on_train_begin.<locals>.<genexpr>rH   eval_avg_win_probeval_win_rater   wandbr   rb   r"   r8   r   win_rate_completions	dataframecomet_mlwin_rate_completions.csvnametable)padding_sider   r%   getattrmodel_wrappedsplit_between_processesr   rB   r&   per_device_eval_batch_sizer   rY   r4   r   r   r   r   is_main_processsumr0   log	report_tor   runr   Tabler   rF   ra   rb   rc   rd   r$   r%   r#   r"   r8   ref_win_probsr   win_rateavg_win_probr   dfr.   r.   rA   r     sl   	






zWinRateCallback.on_train_beginc              	   K   s  |d }d|_ | jj}| jj}|| jd K}t||||| j|jd}	t	t
| j|	}	| jrF| jj||	| jdd}
dd |
D }t|
}
n	| j||	| j}t|}t|	}	t|}W d    n1 sew   Y  | jjjrtd	d
 |D t| }| jrdt|
t|
  }| j||d n| jd|i d|jv rdd l}|jd urt|||	|d}|d|j|di d|jv rt|||	|d}td|d d S d S d S )Nr   r   r>   r   Tr   c                 S   r   r   r.   r   r.   r.   rA   r   z  r   z/WinRateCallback.on_evaluate.<locals>.<listcomp>c                 s   r   r   r.   r   r.   r.   rA   r     r   z.WinRateCallback.on_evaluate.<locals>.<genexpr>rH   r   r   r   r   r   r   r   r   r   r   )r   r   r%   r   r   r   rB   r&   r   rY   r4   r   r   r   r   r   r   r   r0   r   r   r   r   r   r   r   r   r.   r.   rA   r   d  sh   	






zWinRateCallback.on_evaluate)NNTF)rf   rg   rh   ri   r   r   r   r   intboolrG   r   r   r   r   r   r.   r.   r.   rA   r      s*    )
Fr   c                
   @   sF   e Zd ZdZ			ddedee dee dee fddZd	d
 Z	dS )LogCompletionsCallbacka  
    A [`~transformers.TrainerCallback`] that logs completions to Weights & Biases and/or Comet.

    Usage:
    ```python
    trainer = DPOTrainer(...)
    completions_callback = LogCompletionsCallback(trainer=trainer)
    trainer.add_callback(completions_callback)
    ```

    Args:
        trainer (`Trainer`):
            Trainer to which the callback will be attached. The trainer's evaluation dataset must include a `"prompt"`
            column containing the prompts for generating completions.
        generation_config (`GenerationConfig`, *optional*):
            The generation config to use for generating completions.
        num_prompts (`int` or `None`, *optional*):
            The number of prompts to generate completions for. If not provided, defaults to the number of examples in
            the evaluation dataset.
        freq (`int` or `None`, *optional*):
            The frequency at which to log completions. If not provided, defaults to the trainer's `eval_steps`.
    Nr   r&   r   freqc                 C   s^   || _ || _|| _g | _d| _| j jd u rtd| j j| _|d ur-| jt|| _d S d S )NzJTrainer must have an evaluation dataset to use the LogCompletionsCallback.)	r   r&   r   r   _last_logged_stepr   r   r   r/   )rF   r   r&   r   r   r.   r.   rA   rG     s   
zLogCompletionsCallback.__init__c              	      s>  |j | jkrd S | jp|j}|j | dkrd S |d  d _| jj}| jj}|| j	d %} fdd|D }t
|| || j|jd}	t|	}	t|}W d    n1 sVw   Y  | jjjrt|j gt| }
tt|
||	}| j| tjg d| jd	}d
|jv rtd|i d|jv rtd|d |j | _d S )Nr   r   r   r>   c                    s   g | ]}t d |i d  qS )r>   r   )r   r>   r$   r.   rA   r     s    z6LogCompletionsCallback.on_step_end.<locals>.<listcomp>r   )r   r>   r@   )r   rK   r   r8   r   zcompletions.csvr   )r]   r   r   
eval_stepsr   r   r%   r   r   r   rB   r&   r   r   r   r   r0   rY   r4   r   extendr   r   r   r   r   r   )rF   ra   rb   rc   rd   r   r%   r#   r"   r8   r]   rK   r   r.   r   rA   re     sF   



z"LogCompletionsCallback.on_step_end)NNN)
rf   rg   rh   ri   r   r   r   r   rG   re   r.   r.   r.   rA   r     s    
r   c                   @   sN   e Zd ZdZ			dded dedefdd	Zd
d ZdddZdddZ	dS )MergeModelCallbacka  
    A [`~transformers.TrainerCallback`] that merges the policy model (the model being trained) with another model based
    on a merge configuration.

    Args:
        merge_config ([`MergeConfig`], *optional*, defaults to `None`):
            Configuration used for the merging process. If not provided, the default [`MergeConfig`] is used.
        merge_at_every_checkpoint (`bool`, *optional*, defaults to `False`):
            Whether to merge the model at every checkpoint.
        push_to_hub (`bool`, *optional*, defaults to `False`):
            Whether to push the merged model to the Hub after merging.

    Example:

    ```python
    # pip install mergekit

    from trl.mergekit_utils import MergeConfig
    from trl import MergeModelCallback

    config = MergeConfig()
    merge_callback = MergeModelCallback(config)
    trainer = DPOTrainer(..., callbacks=[merge_callback])
    ```
    NFmerge_configr   merge_at_every_checkpointpush_to_hubc                 C   s*   t  std|pt | _|| _|| _d S )NzYMergeModelCallback requires the `mergekit` extra. To install, run `pip install mergekit`.)r   ro   r   r   r   r   )rF   r   r   r   r.   r.   rA   rG     s   
zMergeModelCallback.__init__c                 C   sz   t j|d| }|| j_| jjd u r|jj| j_t j|d}t| j	 | | j
r;| d| d}t|| d S d S )Nzcheckpoint-mergedz_checkpoint-_merged)ospathjoinr   policy_model_pathtarget_model_pathconfig_name_or_pathr   creater   r   )rF   
output_dirr]   r#   checkpoint_path
merge_path	repo_namer.   r.   rA   _merge_and_maybe_push)  s   z(MergeModelCallback._merge_and_maybe_pushc                 K   s    | j r| |j|j| d S d S rE   r   r   r   r]   rF   ra   rb   rc   r#   rd   r.   r.   rA   on_save6     zMergeModelCallback.on_savec                 K   s    | j s| |j|j| d S d S rE   r   r   r.   r.   rA   r   :  r   zMergeModelCallback.on_train_end)NFFrE   )
rf   rg   rh   ri   r   r   rG   r   r   r   r.   r.   r.   rA   r      s    

r   )r   )>r   typingr   r   pandasr   rj   
accelerater   accelerate.stater   accelerate.utilsr   r   transformersr   r	   r
   r   r   r   r   r   transformers.trainer_utilsr   transformers.utilsr   
data_utilsr   import_utilsr   mergekit_utilsr   r   r   models.utilsr   judgesr   utilsr   rich.consoler   r   	rich.liver   
rich.panelr    rich.progressr!   r   rY   r   r   rB   rC   rn   r   r   r   r   r   r.   r.   r.   rA   <module>   sp   (
	
')P

 E\