o
    	Ti                     @   s4   d dl mZmZ d dlmZ eG dd deZdS )    )	dataclassfield)OnlineDPOConfigc                       sB   e Zd ZU dZedd ddidZee ed<  fdd	Z	  Z
S )
	XPOConfiga  
    Configuration class for the [`XPOTrainer`].

    Subclass of [`OnlineDPOConfig`] we can use all its arguments and add the following:

    Parameters:
        alpha (`float` or `list[float]`, *optional*, defaults to `1e-5`):
            Weight of the XPO loss term. If a list of floats is provided then the alpha is selected for each new epoch
            and the last alpha is used for the rest of the epochs.
    c                   C   s   dgS )Ngh㈵> r   r   r   J/home/ubuntu/.local/lib/python3.10/site-packages/trl/trainer/xpo_config.py<lambda>"   s    zXPOConfig.<lambda>helpzWeight of the XPO loss term. If a list of floats is provided then the alpha is selected for each new epoch and the last alpha is used for the rest of the epochs.)default_factorymetadataalphac                    s<   t    t| jdrt| jdkr| jd | _d S d S d S )N__len__   r   )super__post_init__hasattrr   len)self	__class__r   r   r   )   s   
zXPOConfig.__post_init__)__name__
__module____qualname____doc__r   r   listfloat__annotations__r   __classcell__r   r   r   r   r      s   
 r   N)dataclassesr   r   trl.trainer.online_dpo_configr   r   r   r   r   r   <module>   s   