o
    پiY                  
   @   s  d dl Z d dlmZmZmZmZmZ d dlmZ d dl	m
Z
 d dlmZmZmZmZmZmZmZ d dlmZ d dlmZmZ e eZdeeef fd	d
ZdedefddZ dedefddZ!dedefddZ"dedee dedefddZ#de$de$de$fddZ%dd Z&dedededee def
dd Z'dedeeeef  deeeef  dee def
d!d"Z(d#edefd$d%Z)G d&d' d'eZ*dS )(    N)AnyDictListOptionalUnion)Request)ORJSONResponse)%ChatCompletionMessageContentImagePart$ChatCompletionMessageContentTextPart%ChatCompletionMessageContentVideoPartErrorResponseRerankContentRerankResponseV1RerankReqInput)OpenAIServingBase)EmbeddingReqInputGenerateReqInputreturnc              
   C   s   z<| j ddd}| j ddd}t|dkr$t|dkr$|d |d fW S | d}| d}|dur;|dur;||fW S W n tyV } ztd|  W Y d}~nd}~ww td	 d
S )zGet token IDs for 'yes' and 'no' from the tokenizer.

    Different model sizes may have different token IDs, so we look them up dynamically.
    yesF)add_special_tokensno   r   Nz,Failed to get yes/no token IDs dynamically: z/Using fallback token IDs for yes/no (9693/2152))i%  ih  )encodelenconvert_tokens_to_ids	Exceptionloggerwarning)	tokenizer
yes_tokens	no_tokensyes_idno_ide r$   `/home/ubuntu/.local/lib/python3.10/site-packages/sglang/srt/entrypoints/openai/serving_rerank.py_get_yes_no_token_ids   s    



r&   chat_templatec                 C   s0   | sdS |   }d|v pd|v od|v od|v S )z<Detect if the chat template is for Qwen3 text-only reranker.F answer can only be "yes" or "no"answer can only be"yes""no"lower)r'   tr$   r$   r%   _is_qwen3_reranker_template2   s   r/   c                 C   sH   | sdS |   }d|v pd|v od|v od|v }d|v pd|v }|o#|S )zDetect if the chat template is for Qwen3-VL multimodal reranker.

    VL reranker templates use `query` and `document` as jinja variables
    and include vision token placeholders for image/video support.
    Fr(   r)   r*   r+   z<|vision_start|>z<|image_pad|>r,   )r'   r.   has_reranker_phrasehas_vision_tokensr$   r$   r%   _is_qwen3_vl_reranker_template<   s   r2   
model_pathc                 C   s    | sdS |   }d|v pd|v S )z;Check if the model is a Qwen3-VL model based on model path.Fzqwen3-vlqwen3vlr,   )r3   model_lowerr$   r$   r%   _is_qwen3_vl_modelN   s   r6   requestc                 C   s@   |   }t|}t|}t|}|s|s|r|rdS |rdS dS )z
    Unify rerank routing decisions used by both `_convert_to_internal_request` and
    `_handle_non_streaming_request`.

    Returns:
        "vl_decoder" | "text_decoder" | "cross_encoder"
    
vl_decodertext_decodercross_encoder)is_multimodalr6   r2   r/   )r7   r'   r3   r;   is_vl_modelis_vl_templateis_text_templater$   r$   r%   _detect_rerank_backendV   s   r?   p_yesp_noc                 C   s   | | }|dkr
dS | | S )N        r$   )r@   rA   denomr$   r$   r%   _qwen3_rerank_scorep   s   rD   c               
   C   sF   zdd l } W n ty } ztd|d }~ww | j|  d| jdS )Nr   zRendering Qwen3 reranker prompts requires `jinja2`. Please install it in your runtime environment (e.g., `pip install jinja2`).F)loader
autoescape	undefined)jinja2ModuleNotFoundError
ValueErrorEnvironment
BaseLoader	Undefined)rH   r#   r$   r$   r%   _get_jinja_envw   s   rN   querydocumentinstructc          	      C   sp   t  }|| }t|tr|nt|}t|tr|nt|}dd|dd|dgi}|r0||d< |jdi |S )zKRender a loaded Jinja chat template for Qwen3 reranker prompts (text-only).messagesuser)rolecontentrQ   Nr$   )rN   from_string
isinstancestr_extract_text_from_contentrender)	r'   rO   rP   rQ   envtemplate
query_textdoc_textrender_kwargsr$   r$   r%   _render_jinja_chat_template   s   
r`   c                C   s6   t  }|| }||d}|r||d< |jdi |S )zRender a loaded Jinja chat template for Qwen3-VL reranker prompts (multimodal).

    The template expects `query` and `document` as lists of content parts,
    where each part has a `type` field (text, image, video) and corresponding data.
    rO   rP   rQ   Nr$   )rN   rV   rZ   )r'   rO   rP   rQ   r[   r\   r_   r$   r$   r%   _render_vl_jinja_template   s   
rb   rU   c                 C   sh   t | tr| S g }| D ]#}t |tr||j qt |tr.|ddkr.||dd qd|S )z%Extract text from multimodal content.typetext  )rW   rX   r
   appendrd   dictgetjoin)rU   textspartr$   r$   r%   rY      s   


rY   c                       s  e Zd ZdZd( fdd	ZdefddZdedee fd	d
Z		d(dede
deeeef ef fddZdeeef dede
deee eef fddZdede
dee dedeeee eef  f
ddZdede
dedeee ef fddZdede
dedeee ef fddZdededeeeeef  eeeef  ee ee f fddZdedee d ee deeeef  fd!d"Zd#eeef defd$d%Zd#eeeeef  ee f dedee fd&d'Z  ZS ))OpenAIServingRerankzHandler for /v1/rerank requestsNc                    s(   t  | || _t|j\| _| _d S N)super__init__template_managerr&   r   _yes_token_id_no_token_id)selftokenizer_managerrq   	__class__r$   r%   rp      s
   zOpenAIServingRerank.__init__r   c                 C   s   dS )Nzrerank-r$   )rt   r$   r$   r%   _request_id_prefix   s   z&OpenAIServingRerank._request_id_prefixr7   c                 C   s`   |j sdS t|j tr|j  sdS |jsdS |jD ]}|s! dS t|tr-| s- dS qdS )z*Validate rerank request format and contentzQuery cannot be emptyz(Query cannot be empty or whitespace onlyzDocuments cannot be emptyz(Each document must be a non-empty stringz0Each document cannot be empty or whitespace onlyN)rO   rW   rX   strip	documents)rt   r7   docr$   r$   r%   _validate_request   s   

z%OpenAIServingRerank._validate_requestraw_requestc           	         s   | j jj}t| j jdd}tt|tr|nd|d}|dv r#fS  r>t	j
 dd jD } fdd|D }n
fd	djD }t|d
d}|fS )ay  
        Convert OpenAI rerank request to internal format.

        - For Qwen3-VL reranker (multimodal decoder-only): keep the request.
        - For Qwen3 reranker (text-only decoder-only): keep the request and score via
          `tokenizer_manager.score_prompts(...)` in the handler.
        - For cross-encoder rerank models: adapt into `EmbeddingReqInput` pairs.
        r3   re   Nr7   r'   r3   )r8   r9   c                 S   s   g | ]}t |qS r$   )rY   .0r{   r$   r$   r%   
<listcomp>      zDOpenAIServingRerank._convert_to_internal_request.<locals>.<listcomp>c                    s   g | ]} |gqS r$   r$   r   )r]   r$   r%   r     r   c                    s   g | ]} j |gqS r$   )rO   r   )r7   r$   r%   r     s    T)rd   is_cross_encoder_request)ru   r   r'   getattrmodel_configr?   rW   rX   r;   rY   rO   rz   r   )	rt   r7   r}   r'   r3   backend	doc_textspairsadapted_requestr$   )r]   r7   r%   _convert_to_internal_request   s    

z0OpenAIServingRerank._convert_to_internal_requestr   c           
   
      s   t | jjdd}t | jjdd}| j||||dI dH }|dur#|S zt|ts-td| j||	 I dH }W n tyS } z| 
t|W  Y d}~S d}~ww t|ts\|g}| ||}	|	S )zHandle the rerank requestr'   Nr3   re   )r7   r}   r'   r3   zInvalid rerank request adaptation. If you are serving a decoder-only reranker (e.g., Qwen3-Reranker), please provide the corresponding --chat-template and launch without --is-embedding.)r   ru   r   r   _handle_rerank_pathsrW   r   rJ   generate_request	__anext__create_error_responserX   list_build_rerank_response)
rt   r   r7   r}   r'   r3   
rerank_retretr#   	responsesr$   r$   r%   _handle_non_streaming_request  s:   

z1OpenAIServingRerank._handle_non_streaming_requestr'   r3   c                   sV   t |||d}|dkr| |||pdI dH S |dkr)| j|||p#ddI dH S dS )z
        Handle decoder-only rerank paths (VL/text) and return a response if matched.

        Returns None if the request should fall back to cross-encoder rerank.
        r~   r8   re   Nr9   )r7   r}   r'   )r?   _handle_vl_reranker_request_handle_text_reranker_request)rt   r7   r}   r'   r3   r   r$   r$   r%   r   <  s"   

z(OpenAIServingRerank._handle_rerank_pathsc          	   
      s   | j jjs| dS z% fddjD }| j j|| j| jgd|dI dH }dd |D }W n1 tyI } z| t	|W  Y d}~S d}~w t
ya } z| t	|W  Y d}~S d}~ww | |}|S )z>Handle text-only decoder reranker request via score_prompts().zDetected Qwen3 reranker chat template, but the server is not in generation mode. Please relaunch without --is-embedding for Qwen3-Reranker models.c              
      s&   g | ]}t  j|td ddqS )rQ   N)rO   rP   rQ   )r`   rO   r   r   r'   r7   r$   r%   r   q  s    
zEOpenAIServingRerank._handle_text_reranker_request.<locals>.<listcomp>F)label_token_idsapply_softmaxr7   Nc                 S   s   g | ]}t |d  |d qS )r   r   )rD   )r   pr$   r$   r%   r     s    )ru   r   is_generationr   rz   score_promptsrr   rs   rJ   rX   r   r   )	rt   r7   r}   r'   promptsprobsscoresr#   r   r$   r   r%   r   _  s2   


z1OpenAIServingRerank._handle_text_reranker_request_chat_templatec              
      s2  | j jjs| dS zWg }t|dd}|jD ]B}| j|j|d\}}}	}
t||||d}t	||	r3|	nd|
r8|
nddddd	d
dd}| j 
|| I dH }| |}|| q| ||}|W S  ty{ } z| t|W  Y d}~S d}~w ty } ztd | t|W  Y d}~S d}~ww )zJHandle multimodal VL reranker request using chat completion with logprobs.zDetected Qwen3-VL reranker, but the server is not in generation mode. Please relaunch without --is-embedding for Qwen3-VL-Reranker models.rQ   Nra   )r'   rO   rP   rQ   r   r   )max_new_tokenstemperatureT2   )rd   
image_data
video_datasampling_paramsreturn_logprobtop_logprobs_numlogprob_start_lenz"Error handling VL reranker request)ru   r   r   r   r   rz   _build_vl_reranker_contentrO   rb   r   r   r   _extract_score_from_logprobsrg   r   rJ   rX   r   r   	exception)rt   r7   r}   r   r   rQ   r{   query_contentdoc_contentr   r   promptgen_requestr   scorer   r#   r$   r$   r%   r     s`   






z/OpenAIServingRerank._handle_vl_reranker_requestrO   rP   c                 C   s0   g }g }|  |||}|  |||}||||fS )zBuild content lists for VL reranker request.

        Returns:
            Tuple of (query_content, document_content, image_data, video_data)
            where query_content and document_content are lists suitable for jinja template.
        )_content_to_template_list)rt   rO   rP   r   r   r   r   r$   r$   r%   r     s
   z.OpenAIServingRerank._build_vl_reranker_contentrU   r   r   c           
      C   sv  g }t |tr|d|d |S |D ]}t |tr$|d|jd qt |tr;|jr:||jj |ddi qt |trR|j	rQ||j	j |ddi qt |t
r|d}|dkrm|d|ddd q|dkr|di }t |t
r|d}n|}|r|| |ddi q|d	kr|d	i }	t |	t
r|	d}n|	}|r|| |ddi q|S )
zCConvert RerankContent to a list format suitable for jinja template.rd   )rc   rd   rc   imagevideore   	image_urlurl	video_url)rW   rX   rg   r
   rd   r	   r   r   r   r   rh   ri   )
rt   rU   r   r   resultrl   	part_typer   r   r   r$   r$   r%   r     sP   









z-OpenAIServingRerank._content_to_template_listr   c                 C   s   ddl }|di }|dg }|r|d ng }d}d}|D ] }|d |d }	}
|
| jkr4||	}q|
| jkr>||	}qt||S )z?Extract reranking score from generation response with logprobs.r   N	meta_infooutput_top_logprobsrB   r   )mathri   rr   exprs   rD   )rt   r   r   r   r   top_logprobsr@   rA   itemlogprobtoken_idr$   r$   r%   r     s   



z0OpenAIServingRerank._extract_score_from_logprobsc              
   C   s  g }t |D ]_\}}t|trQ|d}t|tr8t|dks(t|d ttfs2td| d|t|d }|	t
t||jrF|j| nd||dd q|	t
t||jr_|j| nd|d q|jd	d
 dd |jdur|jdkr|d|j }|S )z1Build the rerank response from generation results	embeddingr   z,Invalid embedding score for rerank at index z: Nr   )r   rP   indexr   )r   rP   r   c                 S   s   | j S rn   )r   )xr$   r$   r%   <lambda>T  s    z<OpenAIServingRerank._build_rerank_response.<locals>.<lambda>T)keyreverse)	enumeraterW   rh   ri   r   r   intfloatrJ   rg   r   return_documentsrz   sorttop_n)rt   r   r7   r   idxr   	score_valr$   r$   r%   r   -  s>   


z*OpenAIServingRerank._build_rerank_responsern   ) __name__
__module____qualname____doc__rp   rX   rx   r   r   r|   r   tupler   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   __classcell__r$   r$   rv   r%   rm      s    
$

&
#
,
B.

1rm   )+loggingtypingr   r   r   r   r   fastapir   fastapi.responsesr   &sglang.srt.entrypoints.openai.protocolr	   r
   r   r   r   r   r   *sglang.srt.entrypoints.openai.serving_baser   sglang.srt.managers.io_structr   r   	getLoggerr   r   r   r   r&   rX   boolr/   r2   r6   r?   r   rD   rN   r`   rb   rY   rm   r$   r$   r$   r%   <module>   s^    $	




