o
    پi=                     @   sT   d dl Z d dlZd dlmZmZmZmZmZ d dlm	Z	 e 
eZG dd dZdS )    N)AnyDictListOptionalUnion)GenerateReqInputc                   @   s  e Zd Z		ddeeee eee  f dee dedee	 deee
  f
dd	Zd
d Zdee deee  dedee fddZ	d de	dedee dedeee
  f
ddZde	dee dedeee
  fddZ						d!deeeee f  deeeee eee  f  deee  dededee	 deee
  fddZdeee
f dee dedee
 fddZdedee deee
f fddZdS )"TokenizerManagerMultiItemMixinFNpromptslabel_token_idsapply_softmaxrequestreturnc                    s   t |tst |tr"|rt |d tr"| jd|||d|dI dH S t |tr>|r0t |d tr>| jg |||d|dI dH S td)a  
        Score probabilities of specified token IDs after each *full prompt*.

        This is a thin wrapper over `score_request` that treats `prompts` as
        already-composed inputs (i.e., no query/item concatenation needed).

        Args:
            prompts: A single prompt string, a list of prompt strings, or a list of
                pre-tokenized prompt token ID sequences.
            label_token_ids: Token IDs to compute probabilities for.
            apply_softmax: Whether to normalize probabilities using softmax.
            request: Optional FastAPI request object.

        Returns:
            List of score lists, one for each prompt, each in the order of label_token_ids.
        r    F)queryitemsr
   r   
item_firstr   Nz'Invalid prompts type for score_prompts.)
isinstancestrlistscore_request
ValueError)selfr	   r
   r   r    r   i/home/ubuntu/.local/lib/python3.10/site-packages/sglang/srt/managers/tokenizer_manager_multiitem_mixin.pyscore_prompts   s4   

	z,TokenizerManagerMultiItemMixin.score_promptsc              
   C   s   t | jdrD| jjdurF| jdurHz| jj| jjgdd| _W dS  tyC } ztd| jj d|  d| _W Y d}~dS d}~ww dS dS dS )zMInitialize multi-item delimiter text from token ID after tokenizer is loaded.multi_item_scoring_delimiterNF)skip_special_tokensz!Failed to decode delimiter token z: )	hasattrserver_argsr   	tokenizerdecodemulti_item_delimiter_text	Exceptionloggerwarning)r   er   r   r   %_initialize_multi_item_delimiter_text<   s&   

zDTokenizerManagerMultiItemMixin._initialize_multi_item_delimiter_textr   r   delimiter_token_idc                 C   s8   |dd }|D ]}| | || q| | |S )ap  
        Build a single token sequence for multi-item scoring.
        Format: query<delimiter>item1<delimiter>item2<delimiter>item3<delimiter>

        Args:
            query: Query token IDs
            items: List of item token ID sequences
            delimiter_token_id: Token ID to use as delimiter

        Returns:
            Combined token sequence
        N)appendextend)r   r   r   r'   combined_sequenceitemr   r   r    _build_multi_item_token_sequenceN   s   

z?TokenizerManagerMultiItemMixin._build_multi_item_token_sequenceresultsc                 C   s   t |tr	|d n|}|d dg }|s#td|d dd dg }t |tr.t|nd}	|	d }
t||
krRtd	|
 d
|	 dt| d|d dd t|dkrZdnd}t|	D ]}|| }|| }| ||}| |||}|| q`|S )a  
        Process results from multi-item scoring request.
        Extracts logprobs at delimiter positions from input_token_ids_logprobs.

        Args:
            results: Results from generate_request
            items: List of items being scored
            label_token_ids: Token IDs to extract scores for
            apply_softmax: Whether to apply softmax normalization
            batch_request: The original batch request containing input sequence

        Returns:
            List of score lists, one for each item
        r   	meta_infoinput_token_ids_logprobszAinput_token_ids_logprobs is empty for multi-item scoring request id	<unknown>zV. This indicates token_ids_logprobs were not computed properly for Mutil Item Scoring.   z	Expected z6 input_token_ids_logprobs for multi-item scoring with z items, but got z. Request ID: )	r   r   getRuntimeErrorlenrange_extract_logprobs_for_tokens_convert_logprobs_to_scoresr(   )r   r-   r   r
   r   batch_requestsingle_resultinput_logprobsscores	num_itemsexpected_logprobs_count	start_idxitem_idxlogprob_idxitem_logprobs_datalogprobs
score_listr   r   r   #_process_multi_item_scoring_resultsh   s>   zBTokenizerManagerMultiItemMixin._process_multi_item_scoring_resultsc           	      C   sv   g }|D ]4}|d  dg }|rt|dkr$td|d  dd d| |d |}| |||}|| q|S )a  
        Process results from single-item scoring request.
        Single-item scoring results are stored in output_token_ids_logprobs.

        Args:
            results: Results from generate_request
            label_token_ids: Token IDs to extract scores for
            apply_softmax: Whether to apply softmax normalization

        Returns:
            List of score lists, one for each result
        r.   output_token_ids_logprobsr   z%output_logprobs is empty for request r0   r1   .)r3   r5   r4   r7   r8   r(   )	r   r-   r
   r   r<   resultoutput_logprobsrC   rD   r   r   r   $_process_single_item_scoring_results   s   zCTokenizerManagerMultiItemMixin._process_single_item_scoring_resultsr   c                    s  |du r	t d| jdur&| jj}|D ]}||kr%t d| d| dq| jjduo0| jdu}	t|d|	r8dndd	d
did}
t trt|tsXt|t	r|rXt|d trt|tr`|gn|}|	r{| j}|
|}  | | | }|g|
_n[|r fdd|D }n	 fdd|D }||
_nBt t	rt|t	r|rt|d t	r|	r| jj}|  ||}|g|
_n|rŇ fdd|D }n	 fdd|D }||
_nt d| |
| I dH }|	r| |||||
S | |||S )a  
        Score the probability of specified token IDs appearing after the given (query + item) pair.

        This method supports two scoring approaches:
        1. Single-Item scoring (default): Process each query+item pair independently
        2. Multi-Item scoring: When multi_item_scoring_delimiter is set, combine query and
           multiple items into a single sequence using delimiter for efficient processing.
           Note: item_first parameter is ignored in multi-item scoring mode since it uses
           a fixed format: query<delimiter>item1<delimiter>item2<delimiter>item3<delimiter>

           Multi-item scoring works with both text and pre-tokenized inputs:
           - Text: query<delimiter_text>item1<delimiter_text>item2<delimiter_text>item3<delimiter_text>
           - Tokens: query<delimiter_token_id>item1<delimiter_token_id>item2<delimiter_token_id>item3<delimiter_token_id>

        Args:
            query: The query text or pre-tokenized query token IDs
            items: The item text(s) or pre-tokenized item token IDs
            label_token_ids: List of token IDs to compute probabilities for
            apply_softmax: Whether to normalize probabilities using softmax
            item_first: If True, prepend items to query. Ignored for multi-item scoring.
            request: Optional FastAPI request object

        Returns:
            List of lists containing probabilities for each item and each label token
        Nz label_token_ids must be providedz	Token ID z# is out of vocabulary (vocab size: )Tr   Fmax_new_tokens)token_ids_logprobreturn_logproblogprob_start_lenstreamsampling_paramsc                    s   g | ]}|   qS r   r   .0r+   r   r   r   
<listcomp>      z@TokenizerManagerMultiItemMixin.score_request.<locals>.<listcomp>c                    s   g | ]}  | qS r   r   rS   rU   r   r   rV     rW   c                    s   g | ]}|  qS r   r   rS   rU   r   r   rV   3      c                    s   g | ]} | qS r   r   rS   rU   r   r   rV   5  rX   z;Invalid combination of query/items types for score_request.)r   r   
vocab_sizer   r   r!   r   r   r   r   jointextr,   	input_idsgenerate_request	__anext__rE   rJ   )r   r   r   r
   r   r   r   rY   token_iduse_multi_item_scoringr9   
items_list	delimitercombined_itemssingle_promptr	   r'   combined_input_idsinput_ids_listr-   r   rU   r   r      s   "







z,TokenizerManagerMultiItemMixin.score_requestrC   c                    sL   ddl } fdd|D }|r|j||dd }|S dd |D }|S )a\  
        Convert logprobs dictionary to ordered score list.

        Args:
            logprobs: Dictionary mapping token_id to logprob
            label_token_ids: Token IDs in desired order
            apply_softmax: Whether to apply softmax normalization

        Returns:
            List of scores in the same order as label_token_ids
        r   Nc                    s   g | ]
}  |td qS )-inf)r3   float)rT   r_   rC   r   r   rV   \  s    zNTokenizerManagerMultiItemMixin._convert_logprobs_to_scores.<locals>.<listcomp>)dimc                 S   s&   g | ]}|t d krt|ndqS )rg   g        )rh   mathexp)rT   xr   r   r   rV   d  s    )torchsoftmaxtensortolist)r   rC   r
   r   rn   rD   r   ri   r   r8   I  s   
z:TokenizerManagerMultiItemMixin._convert_logprobs_to_scoreslogprobs_datac                 C   s,   i }|r|D ]\}}}||v r|||< q|S )a%  
        Extract logprobs for specified token IDs from logprobs data.

        Args:
            logprobs_data: List of (logprob, token_id, text) tuples
            label_token_ids: Token IDs to extract logprobs for

        Returns:
            Dictionary mapping token_id to logprob
        r   )r   rr   r
   rC   logprobr_   _r   r   r   r7   j  s   z;TokenizerManagerMultiItemMixin._extract_logprobs_for_tokens)FN)N)NNNFFN)__name__
__module____qualname__r   r   r   intboolr   r   rh   r   r&   r,   rE   rJ   r   r   r8   r7   r   r   r   r   r   
   s    

1

 

?

'


}

!
r   )loggingrk   typingr   r   r   r   r   sglang.srt.managers.io_structr   	getLoggerru   r#   r   r   r   r   r   <module>   s    
