o
    	Ti4                     @   s4   d dl mZmZ d dlmZ eG dd deZdS )    )	dataclassfield)OnlineDPOConfigc                       sB   e Zd ZU dZedd ddidZee ed<  fdd	Z	  Z
S )
NashMDConfiga  
    Configuration class for the [`NashMDTrainer`].

    Subclass of [`OnlineDPOConfig`] we can use all its arguments and add the following:

    Parameters:
        mixture_coef (`float` or `list[float]`, *optional*, defaults to `0.5`):
            Logit mixture coefficient for the model and reference model. If a list of floats is provided then the
            mixture coefficient is selected for each new epoch and the last coefficient is used for the rest of the
            epochs.
    c                   C   s   dgS )Ng      ? r   r   r   N/home/ubuntu/.local/lib/python3.10/site-packages/trl/trainer/nash_md_config.py<lambda>#   s    zNashMDConfig.<lambda>helpzLogit mixture coefficient for the model and reference model. If a list of floats is provided then the mixture coefficient is selected for each new epoch and the last coefficient is used for the rest of the epochs.)default_factorymetadatamixture_coefc                    s<   t    t| jdrt| jdkr| jd | _d S d S d S )N__len__   r   )super__post_init__hasattrr   len)self	__class__r   r   r   +   s   
zNashMDConfig.__post_init__)__name__
__module____qualname____doc__r   r   listfloat__annotations__r   __classcell__r   r   r   r   r      s   
 	r   N)dataclassesr   r   trl.trainer.online_dpo_configr   r   r   r   r   r   <module>   s   