o
    
۾i;                     @   sl   d dl mZ d dlZd dlmZ d dlmZ d dlm	Z	 d dl
mZ d dlmZ eeZG dd	 d	eZdS )
    )SequenceN)PreTrainedTokenizerBase)ChatCompletionRequest)DeltaMessage)init_logger)ReasoningParserc                       s   e Zd ZdZdef fddZdededeedB edB f fd	d
Z	dededede
e de
e de
e dedB fddZdedefddZdedefddZdededefddZdedededefddZdedededededefdd ZdedeedB edB edB f fd!d"Z  ZS )#GraniteReasoningParserz
    Reasoning parser for IBM Granite.

    IBM granite models currently use "Here is my thought process:"
    and "Here is my response:" to separate its thinking / response outputs.
    	tokenizerc                    s   t  j|g|R i | d| _d| _t| j d| j dtj| _ddg| _ddg| _	d	| _
d
| _tdd | jD | _d S )Nz&(?:Here's|Here is) my thought process:z(?:Here's|Here is) my response:z(.*?)z(.*)zHere's my thought process:zHere is my thought process:zHere's my response:zHere is my response::Herec                 s   s    | ]}t |V  qd S N)len.0think_start r   [/home/ubuntu/.local/lib/python3.10/site-packages/vllm/reasoning/granite_reasoning_parser.py	<genexpr>3   s    
z2GraniteReasoningParser.__init__.<locals>.<genexpr>)super__init__think_start_exprresponse_start_exprrecompileDOTALLreasoning_regexvalid_think_startsvalid_response_startsseq_boundary_endseq_boundary_startmaxlongest_think_start)selfr	   argskwargs	__class__r   r   r      s   
zGraniteReasoningParser.__init__model_outputrequestreturnNc                 C   s8   | j |}|sd|fS |d \}}|s|dfS ||fS )a  Extract the reasoning content & content sections, respectively.
        If the sequence doesn't match what we expect, i.e., the model generates
        something else, all content is considered non-reasoning content.

        Args:
            model_output (str): Output of the model to be parsed.
            request (ChatCompletionRequest): Request being processed.

        Returns:
            tuple[Optional[str], Optional[str]]: Tuple pair containing the
            reasoning content and non-reasoning content.
        Nr   )r   findall)r"   r'   r(   re_match	reasoningresponse_contentr   r   r   extract_reasoning7   s   z(GraniteReasoningParser.extract_reasoningprevious_textcurrent_text
delta_textprevious_token_idscurrent_token_idsdelta_token_idsc                 C   sh   |  |\}}}	|s| ||}
n|	s| |||}
n|dus!J | |||	||}
|
js2|
js2dS |
S )a  Extract the reasoning content / content emitted by granite models;
        If the sequence doesn't match what we expect, i.e., the model generates
        something else, all content is considered non-reasoning content.

        NOTE: Granite models do not use a special token to start their reasoning
        and response sections; instead they have token sequences, e.g.,

                Here is my thought process: Foo Here is my response: Bar

        This increases the complexity of correctly handling streams, since we
        need to watch for specific sequences and correctly parse them without
        dropping content that is potentially overlapping & spanning multiple
        delta messages.

        Args:
            previous_text (str): Previous text outside of this delta message.
            current_text (str): Previous text + delta text.
            delta_text (str): Text to consider and parse content from.
            previous_token_ids (Sequence[int]): Token IDs of previous_text.
            current_token_ids (Sequence[int]): Token IDs of current_text.
            delta_token_ids (Sequence[int]): Token IDs of delta_text.

        Returns:
            Union[DeltaMessage, None]
                DeltaMessage with either reasoning content or content, or None.
        N)_get_content_sections+_get_delta_message_with_no_reasoning_bounds*_get_delta_message_with_no_response_bounds#_get_delta_message_with_both_boundscontentr,   )r"   r/   r0   r1   r2   r3   r4   r,   resp_seq_lenr9   delta_messager   r   r   extract_reasoning_streamingN   s    #
z2GraniteReasoningParser.extract_reasoning_streamingtextc                       t  fdd| jD S )zCheck if a text matches one of the possible start reasoning seqs.

        Args:
            text (str): Text to check for leading substr.

        Returns:
            bool: True if any of the possible reasoning start seqs match.
        c                 3       | ]}|  V  qd S r   
startswithr   r=   r   r   r      s    

zDGraniteReasoningParser._is_reasoning_start_substr.<locals>.<genexpr>)anyr   r"   r=   r   rB   r   _is_reasoning_start_substr   s   	z1GraniteReasoningParser._is_reasoning_start_substrc                    r>   )zCheck if a text matches one of the possible start response seqs.

        Args:
            text (str): Text to check for leading substr.

        Returns:
            bool: True if any of the possible response start seqs match.
        c                 3   r?   r   r@   r   response_startrB   r   r   r      
    
zCGraniteReasoningParser._is_response_start_substr.<locals>.<genexpr>)rC   r   rD   r   rB   r   _is_response_start_substr   s   	z0GraniteReasoningParser._is_response_start_substrc                 C   s\   t |t | }| |}| |d| }|r |s td|dS |r(tdddS td|dS )a^  Parse the delta message when the current text has not yet completed
        its start of reasoning sequence.

        Args:
            current_text (str): The full previous + delta text.
            delta_text (str): Text to consider and parse content from.

        Returns:
            DeltaMessage: Message containing the parsed content.
        Nr,   r9   )r   rE   r   )r"   r0   r1   prev_longest_length	is_substr
was_substrr   r   r   r6      s   
zBGraniteReasoningParser._get_delta_message_with_no_reasoning_boundsr,   c                    s0  t  fdd| jD }|du s|rtdddS |dt|  }| || j}|| j}|dkr<| ||d nd}|dkrK|  |d nd}	|dkrZ| ||d nd}
|	rdtdddS |sx|
rrt|d| ddS t|ddS |
r||d |d|  }t|ddS t||d | ddS )a5  Parse the delta message when the current text has both reasoning
        content with no (response) content. NOTE that we may have overlapping
        tokens with the start of reasoning / start of response sequences on
        either side of the delta text.

        Args:
            current_text (str): The full previous + delta text.
            reasoning (str): reasoning content from current_text.
            delta_text (str): Text to consider and parse content from.

        Returns:
            DeltaMessage: Message containing the parsed content.
        c                 3   s    | ]}  |V  qd S r   )endswithrF   r0   r   r   r      rH   zTGraniteReasoningParser._get_delta_message_with_no_response_bounds.<locals>.<genexpr>NrJ   r   F)rC   r   r   r   rfindr   rI   )r"   r0   r,   r1   ends_with_start_response_seqr/   prev_idx	delta_idxprev_was_substrdelta_continues_substrdelta_new_substrr   rO   r   r7      sD   zAGraniteReasoningParser._get_delta_message_with_no_response_boundsr-   response_seq_lenc                 C   s   |t | d }t |t ||  }|dk rd}n$t || t | d }	t |t | }
|	|
 }|dk r8d}||| }t||dS )a  Parse the delta message when the current text has both reasoning
        content and normal (response) content.

        Args:
            delta_text: Text to consider and parse content from.
            reasoning: reasoning content from current_text.
            response_content: response content from current_text.
            current_text: The full previous + delta text.
            response_seq_len: Len of the complete response sequence used.

        Returns:
            DeltaMessage: Message containing the parsed content.
        Nr      rJ   )r   r   )r"   r1   r,   r-   r0   rW   delta_contentreasoning_end_idxdelta_reasoningstart_reasoning_idxdelta_offsetstart_offsetr   r   r   r8     s   z:GraniteReasoningParser._get_delta_message_with_both_boundsc                    s   d}d}d} fddt |D }|D ]]}||| }|du r8 jD ]}||dd kr6|d }|d } nq"q|sp jD ]2}	|t|	 d d |	dd kro|t|	 }
|||
 }||d d }|t|	|f    S q=q|r~|s~||d ddfS dS )	a  Parse the text to extract the reasoning content / content
        if we have them.

        Args:
            current_text (str): The full previous + delta text.

        Returns:
            tuple[Optional[str], Optional[int], Optional[str]]: Tuple of len 3
            containing the reasoning content, the length of the response seq
            (if there is one) and the non-reasoning content.
        r   NFc                    s   g | ]\}}| j kr|qS r   )r   )r   idxcharr"   r   r   
<listcomp>Q  s
    
z@GraniteReasoningParser._get_content_sections.<locals>.<listcomp>rX   )NNN)	enumerater   r   r   )r"   r0   current_chunk_startstart_reasoningparsed_contentdelimiter_idxscurrent_chunk_endcurrent_chunkr   rG   end_reasoningr,   r-   r   ra   r   r5   @  s8   


"z,GraniteReasoningParser._get_content_sections)__name__
__module____qualname____doc__r   r   strr   tupler.   r   intr   r<   boolrE   rI   r6   r7   r8   r5   __classcell__r   r   r%   r   r      sz    

<
"
N
+r   )collections.abcr   regexr   transformersr   0vllm.entrypoints.openai.chat_completion.protocolr   'vllm.entrypoints.openai.engine.protocolr   vllm.loggerr   vllm.reasoningr   rl   loggerr   r   r   r   r   <module>   s   