o
    	Tix                     @   s  d Z ddlmZ ddlmZmZmZ g dg ddgg dg d	g d
g ddZze s0e W n	 ey:   Y nw ed g d ed ddg er+ddl	m
Z
mZmZmZmZmZmZmZmZmZ ddlmZ ddlmZmZmZmZmZmZmZ ddlmZm Z m!Z! dd	l"m#Z#m$Z$m%Z%m&Z&m'Z'm(Z(m)Z)m*Z*m+Z+m,Z,m-Z-m.Z.m/Z/m0Z0m1Z1m2Z2m3Z3m4Z4m5Z5m6Z6m7Z7m8Z8m9Z9m:Z:m;Z;m<Z<m=Z=m>Z>m?Z?m@Z@mAZAmBZBmCZCmDZDmEZEmFZFmGZGmHZHmIZImJZJmKZKmLZLmMZMmNZNmOZOmPZPmQZQmRZR ddlSmTZTmUZU ddlVmWZWmXZXmYZY z	e se W n ey   Y dS w ddlmZZZm[Z[m\Z\m]Z] ddl"m^Z^m_Z_ dS ddl`Z`eeaeb d eecde ide`jdea< dS )z0.21.0    )TYPE_CHECKING   )OptionalDependencyNotAvailable_LazyModuleis_diffusers_available)init_zero_verboseScriptArguments	TrlParser)
apply_chat_templateextract_promptis_conversationalmaybe_apply_chat_templatemaybe_convert_to_chatmlmaybe_extract_promptmaybe_unpair_preference_datasetpack_datasettruncate_datasetunpair_preference_datasetBestOfNSampler)SUPPORTED_ARCHITECTURES!AutoModelForCausalLMWithValueHead"AutoModelForSeq2SeqLMWithValueHeadPreTrainedModelWrapperclone_chat_templatecreate_reference_modelsetup_chat_format)0AlignPropConfigAlignPropTrainerAllTrueJudgeBaseBinaryJudge	BaseJudgeBasePairwiseJudgeBaseRankJudge	BCOConfig
BCOTrainer	CPOConfig
CPOTrainer	DPOConfig
DPOTrainerFDivergenceConstantsFDivergenceType	GKDConfig
GKDTrainer
GRPOConfigGRPOTrainerHfPairwiseJudgeIterativeSFTConfigIterativeSFTTrainer	KTOConfig
KTOTrainerLogCompletionsCallbackMergeModelCallbackModelConfigNashMDConfigNashMDTrainerOnlineDPOConfigOnlineDPOTrainerOpenAIPairwiseJudge
ORPOConfigORPOTrainerPairRMJudge	PPOConfig
PPOTrainer	PRMConfig
PRMTrainerRewardConfigRewardTrainer
RLOOConfigRLOOTrainer	SFTConfig
SFTTrainerWinRateCallback	XPOConfig
XPOTrainer)r5   RichProgressCallbackSyncRefModelCallback)get_kbit_device_mapget_peft_configget_quantization_config)scripts
data_utilsextrasmodelstrainerztrainer.callbacksztrainer.utilsrT   )DDPOPipelineOutputDDPOSchedulerOutputDDPOStableDiffusionPipeline"DefaultDDPOStableDiffusionPipelinerU   
DDPOConfigDDPOTrainer)r   )r   r	   r   )rL   rM   )rZ   r[   N__file____version__)module_specextra_objects)er]   typingr   import_utilsr   r   r   _import_structureextendrR   r
   r   r   r   r   r   r   r   r   r   rS   r   rT   r   r   r   r   r   r   r   rQ   r   r	   r   rU   r   r   r   r   r    r!   r"   r#   r$   r%   r&   r'   r(   r)   r*   r+   r,   r-   r.   r/   r0   r1   r2   r3   r4   r5   r6   r7   r8   r9   r:   r;   r<   r=   r>   r?   r@   rA   rB   rC   rD   rE   rF   rG   rH   rI   rJ   rK   trainer.callbacksrL   rM   trainer.utilsrN   rO   rP   rV   rW   rX   rY   rZ   r[   sys__name__globals__spec__modules rk   rk   @/home/ubuntu/.local/lib/python3.10/site-packages/trl/__init__.py<module>   s^   	2N0$	2