o
    is+                  	   @   s   d dl Zd dlZd dlmZ d dlmZ d dlZer d dl	m
Z
 d dlmZ d dlmZ d dlmZ d dlmZ d d	lmZ eeZG d
d dejZejddG dd dZdededeedB edB f fddZejG dd dZG dd deZdS )    N)Sequence)TYPE_CHECKING)TokenizerLike)ChatCompletionRequest)DeltaMessage)ResponsesRequest)init_logger)ReasoningParserc                   @   s   e Zd ZdZdZdS )Olmo3ReasoningState      N)__name__
__module____qualname__	REASONINGCONTENT r   r   [/home/ubuntu/vllm_env/lib/python3.10/site-packages/vllm/reasoning/olmo3_reasoning_parser.pyr
      s    r
   T)frozenc                   @   s&   e Zd ZU eed< eed< dd ZdS )Indicesstartendc                 C   s   | j | j S N)r   r   selfr   r   r   __len__&   s   zIndices.__len__N)r   r   r   int__annotations__r   r   r   r   r   r   !   s   
 r   abreturnc                 C   sV  t | t |k r| |dfn|| df\} }}| |v r9tdt | }t|| || t |  }|r5||fS ||fS tt | d ddD ]-}| | d |d| krptt | | t | }td|}|rj||f  S ||f  S qCtt | d ddD ]-}|| d | d| krtd|}tt || t |}|r||f  S ||f  S q{dS )a.  
    Find the longest overlap where the end of string a matches the start
    of string b.

    Args:
        a: First string
        b: Second string

    Returns:
        Tuple of IndicesTuples representing the overlapping portions in each
        string, or a tuple of None if no overlap exists
    FTr   r   N)NN)lenr   indexrange)r   r   swapind_aind_bir   r   r   string_overlap*   s$   *

r)   c                   @   sp   e Zd ZU dZeed< dZeed< dZeed< ej	Z
eed< ded	B fd
dZdd Zdeded	B fddZd	S )Olmo3ReasoningBuffer<think>think_start</think>	think_end bufferstater    Nc                 C   s  | j | j}|dkr-tj| _| j d | | j |t| j d  }| _ |dkr-t|dS | j | j	}|dkrZtj
| _| j d | | j |t| j	 d  }| _ |dkrZt|dS | jtjkrm| j d	}| _ t|dS | jtj
kr| j d	}| _ t|dS d S )Nr   )content)	reasoningr/   )r0   findr,   r
   r   r1   r"   r   rfindr.   r   )r   start_think_idxpretextend_think_idxtext_bufferr   r   r   process_buffer`   s:   



z#Olmo3ReasoningBuffer.process_bufferc                 C   s
   t | jS r   )r"   r0   r   r   r   r   r      s   
zOlmo3ReasoningBuffer.__len__
delta_textc                 C   s   |  j |7  _ d }t|| j\}}t|| j\}}|d uo%t|t| jk }|d uo2t|t| jk }|rC| j| j v rC|sC|  }|S |rQ| j| j v rQ|  }|S |sU|rWd S |  }|S r   )r0   r)   r,   r.   r"   r:   )r   r;   delta_message_overlap_think_startoverlap_think_endpartial_overlap_startpartial_overlap_endr   r   r   add_text   s8   

zOlmo3ReasoningBuffer.add_text)r   r   r   r,   strr   r.   r0   r
   r   r1   r   r:   r   rB   r   r   r   r   r*   T   s   
 0r*   c                       s   e Zd ZdZd fddZdee defdd	Zde	e de	e fd
dZ
dedeeB deedB edB f fddZdedededee dee dee dedB fddZ  ZS )Olmo3ReasoningParsera  
    Reasoning parser for Olmo 3 model

    Olmo3ReasoningParser

    This class implements a reasoning parser specifically designed for the
    Olmo 3 family of models. Olmo 3 models do not use special tokens to
    indicate reasoning; rather, reasoning trace is wrapped in `<think>` and
    `</think>`, which are tokenized using standard vocabulary entries.
    Because of this, the parser operates in string space, accumulating the
    characters in a buffer until it sees `<think>` or `</think>`. tokens
    to switch modes.

    Key Features:
        - For non-stream output, Recognizes and extracts reasoning (text
          bracketed by `<think>` and `</think>`) and content (everything
          after the first `</think>`).
        - For stream process, it uses a buffer to accumulate delta text,
          and output progressive delta messages as soon as thinking starts
          or ends.
        - For reliability, some Olmo 3 models may hardcode the first
          `<think>` token is the input text (similar to Deepseek R1,
          or reasoning-only Qwen models). To support such variants, the
          parser can optionally work in cases where the first `<think>`
          token is missing from generation.
    	tokenizerr   c                    sb   t  j|g|R i | d| _d| _d| j d| j d}t|tj| _t| j| jd| _	d S )Nr+   r-   z^(?:z)?(?P<reasoning>.*?)z(?P<content>.*)$)r,   r.   )
super__init__r,   r.   recompileDOTALLreasoning_regexr*   r0   )r   rE   argskwargsreasoning_expr	__class__r   r   rG      s   
zOlmo3ReasoningParser.__init__	input_idsr    c                 C   s   | j |}| j|v S r   )model_tokenizerdecoder.   )r   rQ   textr   r   r   is_reasoning_end   s   
z%Olmo3ReasoningParser.is_reasoning_endc                 C   s   g S r   r   )r   rQ   r   r   r   extract_content_ids   s   z(Olmo3ReasoningParser.extract_content_idsmodel_outputrequestNc                 C   s<   | j |}|r|dpd}|dpd}||fS d|fS )a1  Extract the reasoning content & content sections, respectively.
        If the sequence doesn't match what we expect, i.e., the model generates
        something else, all content is considered non-reasoning content.

        Args:
            model_output (str): Output of the model to be parsed.
            request (ChatCompletionRequest | ResponsesRequest): Request being
                processed.

        Returns:
            tuple[Optional[str], Optional[str]]: Tuple pair containing the
            reasoning content and non-reasoning content.
        r3   Nr2   )rK   matchgroup)r   rW   rX   re_matchr3   r2   r   r   r   extract_reasoning   s   z&Olmo3ReasoningParser.extract_reasoningprevious_textcurrent_textr;   previous_token_idscurrent_token_idsdelta_token_idsc                 C   s2   | j |}|du r| j j| j j v r| j  }|S )z5Extract content using token ID sequence state machineN)r0   rB   r.   r:   )r   r]   r^   r;   r_   r`   ra   r<   r   r   r   extract_reasoning_streaming  s   
z0Olmo3ReasoningParser.extract_reasoning_streaming)rE   r   )r   r   r   __doc__rG   r   r   boolrU   listrV   rC   r   r   tupler\   r   rb   __classcell__r   r   rO   r   rD      s6    
rD   ) dataclassesdtenumcollections.abcr   typingr   regexrH   vllm.tokenizersr   0vllm.entrypoints.openai.chat_completion.protocolr   'vllm.entrypoints.openai.engine.protocolr   *vllm.entrypoints.openai.responses.protocolr   vllm.loggerr   vllm.reasoningr	   r   loggerEnumr
   	dataclassr   rC   rf   r)   r*   rD   r   r   r   r   <module>   s(   
&*q