o
    پi                     @   sj   d Z ddlmZ ddlmZ ddlmZ ddlmZm	Z	 ddl
mZ ddlmZ G dd	 d	eZegZd
S )z3Qwen3 Reward Model for RLHF and best-of-N sampling.    )Optional)nn)Qwen2Config)PoolerPoolingType)QuantizationConfig)Qwen3ForPooledOutputc                	       s<   e Zd ZdZ		d
dedee deddf fdd	Z  Z	S )Qwen3ForRewardModelz:Qwen3 Reward Model with 2-layer MLP scoring head for RLHF.N configquant_configprefixreturnc                    sV   t  ||| d| _tt|j|jt t|j| j| _t	t
jdd| _d S )N   F)pooling_type	normalize)super__init__
num_labelsr   
SequentialLinearhidden_sizeReLUscorer   r   LASTpooler)selfr   r   r   	__class__ N/home/ubuntu/.local/lib/python3.10/site-packages/sglang/srt/models/qwen3_rm.pyr      s   zQwen3ForRewardModel.__init__)Nr
   )
__name__
__module____qualname____doc__r   r   r   strr   __classcell__r   r   r   r    r	      s    r	   N)r$   typingr   torchr   transformersr   sglang.srt.layers.poolerr   r   *sglang.srt.layers.quantization.base_configr   &sglang.srt.models.qwen3_classificationr   r	   
EntryClassr   r   r   r    <module>   s   