o
    io                     @   s   d dl Z d dlmZ d dlmZ d dlZd dlmZ d dl	m
Z
 d dlmZmZmZmZmZmZ d dlmZ d dlmZ d d	lmZ d d
lmZ eeZG dd deZdS )    N)Sequence)Any)make_tool_call_id)ChatCompletionRequest)DeltaFunctionCallDeltaMessageDeltaToolCallExtractedToolCallInformationFunctionCallToolCall)init_logger)TokenizerLike)
ToolParser)extract_intermediate_diffc                       sd  e Zd Zdef fddZdedefddZdedefd	d
ZdedefddZdede	de
fddZdeddfddZdedefddZdedefddZdedeeef fddZdedefddZdKddZdKd d!Zd"eddfd#d$Zdefd%d&Zd'edefd(d)Zd'eddfd*d+Zdedefd,d-Zdedeeeef  fd.d/Zd0ed1eje defd2d3Zded4edeedB edB f fd5d6Z d0ed'ede!dB fd7d8Z"d0ed'ede!dB fd9d:Z#d;edefd<d=Z$d>ed;eded?e%e d@e%e dAe%e de	de!dB fdBdCZ&d;ededB fdDdEZ'd;ededFededB fdGdHZ(d;edFedefdIdJZ)  Z*S )LMinimaxToolParser	tokenizerc                    s   t  | dg g d| _d| _d| _tdtj| _d| _	td| _
td| _d	| _d
| _| js7td| j| j| _| j| j| _| jd u sQ| jd u rXtd d S d S )Ncurrent_tool_indextool_ids
sent_toolsz<tool_calls>z</tool_calls>z/<tool_calls>(.*?)</tool_calls>|<tool_calls>(.*)<think>(.*?)</think>z"name":\s*"([^"]+)"z"arguments":\s* FzUThe model tokenizer must be passed to the ToolParser constructor during construction.zrMinimax Tool parser could not locate tool call start/end tokens in the tokenizer. Falling back to string matching.)super__init__streaming_statetool_call_start_tokentool_call_end_tokenrecompileDOTALLtool_call_regexthinking_tag_patterntool_name_patterntool_args_patternpending_bufferin_thinking_tagmodel_tokenizer
ValueErrorvocabgettool_call_start_token_idtool_call_end_token_idloggerwarning)selfr   	__class__ [/home/ubuntu/vllm_env/lib/python3.10/site-packages/vllm/tool_parsers/minimax_tool_parser.pyr   !   s4   zMinimaxToolParser.__init__model_outputreturnc                 C   s   dd }t j| j||t jdS )z
        Preprocess model output by removing tool calls from thinking tags.

        Args:
            model_output: Raw model output string

        Returns:
            Preprocessed model output with tool calls removed from thinking tags
        c                 S   s*   |  d}tjdd|tjd}d| dS )N   z<tool_calls>.*?</tool_calls>r   flags<think></think>)groupr   subr    )matchthink_contentcleaned_contentr2   r2   r3   remove_tool_calls_from_thinkT   s
   

zOMinimaxToolParser.preprocess_model_output.<locals>.remove_tool_calls_from_thinkr7   )r   r<   r"   r    )r/   r4   r@   r2   r2   r3   preprocess_model_outputI   s   z)MinimaxToolParser.preprocess_model_output	args_textc                 C   s   |  }|s|S zt| |W S  tjy   Y nw |dr@|dd }zt| |W S  tjy:   |}Y nw |ds |S )z
        Clean duplicate closing braces from arguments text.

        Args:
            args_text: Raw arguments text

        Returns:
            Cleaned arguments text with proper JSON formatting
        z}}Nr   )stripjsonloadsJSONDecodeErrorendswith)r/   rB   	candidater2   r2   r3   _clean_duplicate_bracesb   s&   




z)MinimaxToolParser._clean_duplicate_braces
delta_textc                 C   sN   |s|S |  }|r%tdd |D r%|d}|dkr%|dr#dS dS |S )z
        Clean delta text by removing excessive closing braces.

        Args:
            delta_text: Delta text to clean

        Returns:
            Cleaned delta text
        c                 s   s    | ]}|d v V  qdS )z}
	 Nr2   ).0cr2   r2   r3   	<genexpr>   s    z8MinimaxToolParser._clean_delta_braces.<locals>.<genexpr>}r6   
z}
)rC   allcountrG   )r/   rJ   delta_strippedbrace_countr2   r2   r3   _clean_delta_braces   s   

z%MinimaxToolParser._clean_delta_bracesrequestc                 C   s  |  |}| j|vrtdg |dS z| j|}g }|D ]E}|d r&|d n|d }| ra| d}|D ])}	|	 }	|	r`|	dr`|	dr`zt	
|	}
||
 W q7 t	jy_   Y q7w q7qg }|D ] }d|v rd	|v r|td
t|d t	j|d	 dddd qf|| j}|dkr|d|  }|r|d}t|D ]}	|	 }	|	r||	}|dkr|d|t|	  } nqd}nd}n|}tt|dk|| r| dW S ddW S  ty   td tdg |d Y S w )a  
        Extract tool calls from model output for non-streaming mode.

        Args:
            model_output: Complete model output
            request: Chat completion request

        Returns:
            ExtractedToolCallInformation containing tool calls and content
        F)tools_called
tool_callscontentr   r6   rO   {rN   name	argumentsfunction)ensure_ascii)rZ   r[   )typer\   r   Nr   z9An unexpected error occurred during tool call extraction.)rA   r   r	   r!   findallrC   split
startswithrG   rD   rE   appendrF   r   r
   dumpsfindreversedlen	Exceptionr-   	exception)r/   r4   rU   processed_outputfunction_call_tuplesraw_function_callsr=   tool_call_contentlineslineparsed_callrW   function_callprocessed_posprocessed_contentposrX   r2   r2   r3   extract_tool_calls   s   






z$MinimaxToolParser.extract_tool_callstextNc                 C   s4   | d}| d}||kp||ko|d| _dS )z
        Update the thinking tag state based on text content.

        Args:
            text: Text to analyze for thinking tags
        r9   r:   N)rQ   rG   r&   )r/   ru   
open_countclose_countr2   r2   r3   _update_thinking_state   s
   

z(MinimaxToolParser._update_thinking_statec              	      sL   | j | jfD ] t fddtdttd t D r# dS qdS )z
        Check if text might be the start of a tool call tag.

        Args:
            text: Text to check

        Returns:
            True if text could be the start of a tool call tag
        c                 3   s$    | ]}  | d  V  qd S N)ra   )rK   itagru   r2   r3   rM     s
    
z<MinimaxToolParser._is_potential_tag_start.<locals>.<genexpr>r6   TF)r   r   anyrangeminrf   )r/   ru   r2   r{   r3   _is_potential_tag_start   s   
z)MinimaxToolParser._is_potential_tag_startc                 C   s2   | j rdS t| jp| j|v p| j|v p|dS )z
        Determine if content should be buffered for later processing.

        Args:
            delta_text: Delta text to check

        Returns:
            True if content should be buffered
        F<)r&   boolr%   r   r   ra   )r/   rJ   r2   r2   r3   _should_buffer_content  s   
z(MinimaxToolParser._should_buffer_contentc                 C   s   | j r|dfS | j| jfD ]4}tdt|D ]*}|d| }||}|dkr@|||d r@|d| ||d f    S qq|dfS )z
        Split delta text into safe content and potential tag content.

        Args:
            delta_text: Delta text to split

        Returns:
            Tuple of (safe_content, potential_tag_content)
        r   r6   Nr   )r&   r   r   r~   rf   rfindra   )r/   rJ   r|   rz   
tag_prefixrs   r2   r2   r3   _split_content_for_buffering   s   

 z.MinimaxToolParser._split_content_for_bufferingnew_contentc                 C   s   |  j |7  _ d}| jr| j }d| _ |S | j rq| j | j}| j | j}|dkr:|dks1||k r:|t| j}}n!|dkrG|t| j}}n| | j rP	 |S || j 7 }d| _ 	 |S || j d| 7 }| j || d | _ | j s|S )z
        Process buffered content and return output content.

        Args:
            new_content: New content to add to buffer

        Returns:
            Processed output content
        r   r   N)r%   r&   rd   r   r   rf   r   )r/   r   output_content	start_posend_postag_postag_lenr2   r2   r3   _process_buffer5  s0   

z!MinimaxToolParser._process_bufferc                 C   s   dg g d| _ dS )z,Reset the streaming state to initial values.r   r   Nr   r/   r2   r2   r3   _reset_streaming_state[  s   z(MinimaxToolParser._reset_streaming_statec                 C   s   t | jd d | jd< dS )z3Advance to the next tool in the streaming sequence.r   r6   Nintr   r   r2   r2   r3   _advance_to_next_toolc  s   z'MinimaxToolParser._advance_to_next_toolindexc                 C   s   || j d< dS )za
        Set the current tool index.

        Args:
            index: Tool index to set
        r   Nr   )r/   r   r2   r2   r3   _set_current_tool_indexi     z)MinimaxToolParser._set_current_tool_indexc                 C   s   t | jd S )z^
        Get the current tool index.

        Returns:
            Current tool index
        r   r   r   r2   r2   r3   _get_current_tool_indexr  r   z)MinimaxToolParser._get_current_tool_index
tool_countc                 C   sH   t | jd }t|D ]}|t|k r|| d s|  S q|  S dS )z
        Get the index of the next unsent tool.

        Args:
            tool_count: Total number of tools

        Returns:
            Index of next unsent tool, or -1 if all tools sent
        r   	sent_namer   )listr   r~   rf   )r/   r   r   rz   r2   r2   r3   _get_next_unsent_tool_index{  s   
z-MinimaxToolParser._get_next_unsent_tool_indexc                 C   s   t | jd }t | jd }t||k r$|ddt d t||k st||k r5|d t||k s*|| jd< || jd< dS )z
        Ensure state arrays have sufficient capacity for tool_count tools.

        Args:
            tool_count: Number of tools to prepare for
        r   r   Fr   )r   sent_argumentsidN)r   r   rf   rb   r   )r/   r   r   r   r2   r2   r3   _ensure_state_arrays  s   	

z&MinimaxToolParser._ensure_state_arraysc                 C   s   | j |}t|S )z
        Detect the number of tools in text by counting name patterns.

        Args:
            text: Text to analyze

        Returns:
            Number of tools detected
        )r#   r_   rf   )r/   ru   matchesr2   r2   r3   _detect_tools_in_text  s   
z'MinimaxToolParser._detect_tools_in_textc           
      C   s"  g }d}|t |k r|| dkr|}d}d}d}|t |k rw|| dkr)|d7 }n(|| dkrQ|d8 }|dkrQ|d }||| }	d|	v rPd|	v rP|||f n&|s_d|||d  v r_d}|smd|||d  v rmd}|d7 }|t |k s|dkr|r|||f n|d7 }|t |k s
|S )	z
        Find the boundaries of tool calls in text.

        Args:
            text: Text to analyze

        Returns:
            List of (start, end) positions for tool calls
        r   rY   Fr6   rN   z"name"z"arguments"T)rf   rb   )
r/   ru   
boundariesrz   startdepthhas_namehas_argumentsendsegmentr2   r2   r3   _find_tool_boundaries  s>   

z'MinimaxToolParser._find_tool_boundariestool_content
args_matchc           	      C   s   |  }||d }| dr<d}t|D ]#\}}|dkr$|d7 }q|dkr:|d8 }|dkr:|d|d    S qn|d}|dkrM|d|  S |d S )z
        Extract tool arguments from tool content.

        Args:
            tool_content: Tool call content
            args_match: Regex match for arguments pattern

        Returns:
            Extracted arguments as string
        NrY   r   r6   rN   )r   rC   ra   	enumeraterd   rstrip)	r/   r   r   args_start_posremaining_contentr   rz   charargs_endr2   r2   r3   _extract_tool_args  s"   

z$MinimaxToolParser._extract_tool_args
tool_indexc                 C   s   |  |}|t|krdS || \}}||| }| j|}|r&|dnd}| j|}	|	rXz| ||	}
||
fW S  tyW   ||	 d }|	d
 }
||
f Y S w |dfS )a  
        Get the content of a specific tool by index.

        Args:
            text: Text containing tool calls
            tool_index: Index of tool to extract

        Returns:
            Tuple of (tool_name, tool_arguments) or (None, None) if not found
        )NNr6   NrN   )r   rf   r#   searchr;   r$   r   rg   r   r   rC   )r/   ru   r   r   r   r   r   
name_matchrZ   r   rB   r   r2   r2   r3   _get_current_tool_content  s$   

z+MinimaxToolParser._get_current_tool_contentc           
      C   s   |  |}|dkrdS | |}|t|krdS | ||\}}|s$dS | | t| jd }t| jd }|| d }	|	||< d|| d< || jd< || jd< tt|d|	t	|d	j
dd
dgdS )z
        Handle streaming of tool names.

        Args:
            tool_content: Content containing tool calls
            tool_count: Total number of tools

        Returns:
            DeltaMessage with tool name or None if no tool to stream
        r   Nr   r   r   Tr   r\   )rZ   exclude_none)r   r^   r   r\   rW   )r   r   rf   r   r   r   r   r   r   r   
model_dump)
r/   r   r   next_idxr   	tool_name_r   r   tool_idr2   r2   r3   _handle_tool_name_streaming"  s8   





z-MinimaxToolParser._handle_tool_name_streamingc                 C   sL  |   }|dk s||krdS | ||\}}|r|du rdS t| jd }|| d s-dS | |}|| d }||kr|rv||rvt||}	|	rt| |	}	||| d< || jd< |drc| 	  t
t|t|	djdd	d
gdS dS |s|r| |}
||| d< || jd< |dr| 	  t
t|t|
djdd	d
gdS dS )a  
        Handle streaming of tool arguments.

        Args:
            tool_content: Content containing tool calls
            tool_count: Total number of tools

        Returns:
            DeltaMessage with tool arguments or None if no arguments to stream
        r   Nr   r   r   rN   )r[   Tr   )r   r\   r   )r   r   r   r   rI   ra   r   rT   rG   r   r   r   r   r   )r/   r   r   current_idxr   	tool_argsr   
clean_args	sent_args
args_deltaclean_args_deltar2   r2   r3   _handle_tool_args_streamingT  sd   







z-MinimaxToolParser._handle_tool_args_streamingcurrent_textc                    s   | j |vrdS g }d}	 || j |  dkrn
|   d }qg }tj| j|tjdD ]}|| | f q.|D ] t	 fdd|D }|sP dS q>dS )	NFr   Tr   r6   r7   c                 3   $    | ]\}} |ko |k V  qd S ry   r2   rK   t_startt_endrs   r2   r3   rM         
z7MinimaxToolParser._is_end_tool_calls.<locals>.<genexpr>)
r   rd   rb   r   finditerr"   r    r   r   r}   )r/   r   end_token_positionssearch_startthink_regionsr=   in_thinkr2   r   r3   _is_end_tool_calls  s0   



z$MinimaxToolParser._is_end_tool_callsprevious_textprevious_token_idscurrent_token_idsdelta_token_idsc                 C   s  |  | | jrt|dS | |r | |}|rt|dS d S | |r*t|dS | |\}	}
|
rC|  j|
7  _|	rAt|	dS d S | |}| j	|vrz| j
|v rY| j	|v rYd S | dkrf| j	|v rfd S |  dkru| j
|v ru|   t|dS | jd ur| j|v rt|dkrd S | |}|d u rd S | |||}|rt|dS z.| ||}| |}|dkrW d S |  dkr|   | | | ||p| ||W S  ty   tdd Y d S w )N)rX   r   r   r6   r   zAn unexpected error occurred z$during streaming tool call handling.)rx   r&   r   r   r   r   r   r%   rA   r   r   rC   r   r   r+   rf   !_find_tool_start_outside_thinking_extract_content_before_tools_extract_tool_contentr   r   r   r   rg   r-   rh   )r/   r   r   rJ   r   r   r   rU   buffered_outputsafe_contentpotential_tagprocessed_current_textoriginal_tool_startcontent_before_toolsr   current_tools_countr2   r2   r3   extract_tool_calls_streaming  sl   



















z.MinimaxToolParser.extract_tool_calls_streamingc                    sd   d}	 | | j|  dkrdS dd tjd|tjdD }t fd	d
|D }|s- S  d }q)z
        Find the start position of tool calls outside of thinking tags.

        Args:
            current_text: Current text to search

        Returns:
            Position of tool call start or None if not found
        r   Tr   Nc                 S   s   g | ]
}|  | fqS r2   )r   r   )rK   mr2   r2   r3   
<listcomp>  s    zGMinimaxToolParser._find_tool_start_outside_thinking.<locals>.<listcomp>r   r7   c                 3   r   ry   r2   r   r   r2   r3   rM   !  r   zFMinimaxToolParser._find_tool_start_outside_thinking.<locals>.<genexpr>r6   )rd   r   r   r   r    r}   )r/   r   r   r   r   r2   r   r3   r     s"   
z3MinimaxToolParser._find_tool_start_outside_thinking
tool_startc                 C   sT   |dkr(t |t | }||k r(|}|t | |kr"|d||  }|r&|S dS dS )a  
        Extract content that appears before tool calls.

        Args:
            current_text: Current text
            delta_text: Delta text
            tool_start: Start position of tools

        Returns:
            Content before tools or None
        r   N)rf   )r/   r   rJ   r   delta_start_poscontent_partr2   r2   r3   r   *  s   z/MinimaxToolParser._extract_content_before_toolsc                 C   s>   |t | j }||d }|| j}|dkr|d| }|S )z
        Extract tool content from current text starting at tool_start.

        Args:
            current_text: Current text
            tool_start: Start position of tool calls

        Returns:
            Extracted tool content
        Nr   )rf   r   rd   r   )r/   r   r   tool_content_startr   r   r2   r2   r3   r   A  s   z'MinimaxToolParser._extract_tool_content)r5   N)+__name__
__module____qualname__r   r   strrA   rI   rT   r   r	   rt   rx   r   r   r   tupler   r   r   r   r   r   r   r   r   r   r   r   r   Matchr   r   r   r   r   r   r   r   r   r   r   __classcell__r2   r2   r0   r3   r       s    (
X
&
		,
$
2
H	
S
r   )rD   collections.abcr   typingr   regexr   vllm.entrypoints.chat_utilsr   0vllm.entrypoints.openai.chat_completion.protocolr   'vllm.entrypoints.openai.engine.protocolr   r   r   r	   r
   r   vllm.loggerr   vllm.tokenizersr   &vllm.tool_parsers.abstract_tool_parserr   vllm.tool_parsers.utilsr   r   r-   r   r2   r2   r2   r3   <module>   s    