o
    
۾i}h                     @   s   d dl mZ d dlZd dlmZ d dlmZmZm	Z	m
Z
mZmZ d dlmZ d dlmZ d dlmZ eeZG dd	 d	eZdS )
    )SequenceN)ChatCompletionRequest)DeltaFunctionCallDeltaMessageDeltaToolCallExtractedToolCallInformationFunctionCallToolCall)init_logger)TokenizerLike)
ToolParserc                       s   e Zd Zdef fddZdedeeeef fddZdd	d
Z	dddZ
dededefddZdedededee dee dee dededB fddZ  ZS )KimiK2ToolParser	tokenizerc                    s:  t  | d _g  _d _g  _d _d _d _d _	d _
d _d _d _dd	g _dd
g _d _d _tdtj _td _td _ jsUtd j j _ j j _ fdd jD  _ fdd jD  _ j j _  j j _! jd u s jd u rt"dd S )NF i   r   i    z<|tool_calls_section_begin|>z<|tool_calls_section_end|>z<|tool_call_section_begin|>z<|tool_call_section_end|>z<|tool_call_begin|>z<|tool_call_end|>z<\|tool_call_begin\|>\s*(?P<tool_call_id>[^<]+:\d+)\s*<\|tool_call_argument_begin\|>\s*(?P<function_arguments>(?:(?!<\|tool_call_begin\|>).)*?)\s*<\|tool_call_end\|>zV(?P<tool_call_id>.+:\d+)\s*<\|tool_call_argument_begin\|>\s*(?P<function_arguments>.*)z(?P<tool_call_id>.+:\d+)\s*zUThe model tokenizer must be passed to the ToolParser constructor during construction.c                    $   g | ]} j | d urqS Nvocabget.0variantselftid Y/home/ubuntu/.local/lib/python3.10/site-packages/vllm/tool_parsers/kimi_k2_tool_parser.py
<listcomp>T   
    z-KimiK2ToolParser.__init__.<locals>.<listcomp>c                    r   r   r   r   r   r   r   r   Y   r   zQKimi-K2 Tool parser could not locate tool call start/end tokens in the tokenizer!)#super__init__current_tool_name_sentprev_tool_call_arrcurrent_tool_idstreamed_args_for_toolin_tool_sectiontoken_bufferbuffer_max_sizesection_char_countmax_section_chars_buffer_overflow_loggedtool_calls_start_tokentool_calls_end_tokentool_calls_start_token_variantstool_calls_end_token_variantstool_call_start_tokentool_call_end_tokenrecompileDOTALLtool_call_regexstream_tool_call_portion_regexstream_tool_call_name_regexmodel_tokenizer
ValueErrorr   r   tool_calls_start_token_idtool_calls_end_token_idtool_calls_start_token_idstool_calls_end_token_idstool_call_start_token_idtool_call_end_token_idRuntimeError)r   r   	__class__r   r   r!      sd   

zKimiK2ToolParser.__init__textreturnc                 C   s^   d}d}|}| j D ]}||v r||d}d}q	| jD ]}||v r)||d}d}q|||fS )z
        Check for section begin/end markers in text and strip them.
        Returns: (cleaned_text, found_section_begin, found_section_end)
        Fr   T)r.   replacer/   )r   rC   found_begin	found_endcleanedr   r   r   r   _check_and_strip_markersk   s   


z)KimiK2ToolParser._check_and_strip_markersNc                 C   s   d| _ d| _d| _dS )z&Reset state when exiting tool section.Fr   r   N)r&   r'   r)   r   r   r   r   _reset_section_state   s   
z%KimiK2ToolParser._reset_section_statec                 C   s.   |    d| _g | _d| _g | _td dS )z
        Reset all streaming state. Call this between requests to prevent
        state leakage when parser instance is reused.
        Fr   zStreaming state resetN)rK   r"   r#   r$   r%   loggerdebugrJ   r   r   r   reset_streaming_state   s   z&KimiK2ToolParser.reset_streaming_statemodel_outputrequestc           
   
   C   s   | j |vrtdg |dS zJ| j|}td| g }|D ] }|\}}|dd dd }|t|dt	||d	d
 q|d |
| j  }	td||	rR|	dW S d dW S  tyk   td tdg |d Y S w )NF)tools_called
tool_callscontentzfunction_call_tuples: %s:r   .r   function)name	arguments)idtyperV   Tz,Error in extracting tool call from response.)r,   r   r5   findallrL   rM   splitappendr	   r   find	Exception	exception)
r   rO   rP   function_call_tuplesrR   matchfunction_idfunction_argsfunction_namerS   r   r   r   extract_tool_calls   sD   



z#KimiK2ToolParser.extract_tool_callsprevious_textcurrent_text
delta_textprevious_token_idscurrent_token_idsdelta_token_idsc           (         sl  t d| t d| d}|  j|7  _t| j| jkr6| js*t d| j d| _| j| j d d  | _| | j\}	}
}|
rR| jsRt d d| _|	| _d| _	|r| jrt d	 | j
|v }|rnd}t d
 |	| _n8t d |   d}| jD ]}||v r||d}t|dkr|d } nq|| rt|dS tddS |	| _t fdd| jD }|s| jst d t|dS | |\}}}| jr|  j	t|7  _	| j	| jkrt d| j |   t| r|dS ddS z1|| j}|| j
} | j} | j
}d }d }||krA||krA| j|vrA| jr6|dkr6t d| tddW S t d t|dW S | j|v rtt d || }|| jd | jd  }|| jd  }|| jd  }||kr||krt|dkr|| jd }nd }d }d }|  jd7  _d| _| jd t d| j n||kr||kr|| jd }d }n||kr[||kr[| jd u st| jdkrt d |r| jr|   W d S | j| j d}|rZ|tu r| d!dn|}d|vr|r| jr|   W d S |"d}|d | d }t d| | j| j  |7  < |rG| jrGt d |   tt#| jt$|dj%dd d!gd"W S n8| jrqt d# |rk|   tddW S |&| jd}|&| jd}tg |d$}|r| jr|   |W S t' }|r| j()|}|r|* \}} |d%d d&d }!| |d'< |!|d(< | |d< n1| j+)|}"|"r|"* \}#|#d%d d&d }!|# |d'< |!|d(< d|d< nt d) W d S | js(|d u r W d S |d(}$|d'}|$r%d| _tt#| jd*|t$|$d+j%dd d,gd"W S W d S |d u rF| jr4W d S |d urAt|d}|W S d }|W S t d-| j t| j| jkr\| ji  | j| j d}%|d}&t d.|% t d/|& |&s|%st d0| d }nv|&s|%rt ,d1 d }nh|&r|%stt#| jt$|&dj%dd d!gd"}|&| j| j< nI|&r|%rt-|tr|&|%krt|&t|%kr|&.|%r|&t|%d  }'t d2| tt#| jt$|'dj%dd d!gd"}|&| j| j< nd }| jt| jd kr|| j| j< n| j| |r#| jr#t d |   |W S  t/y5   t 0d3 Y d S w )4Nzdelta_text: %szdelta_token_ids: %sFzxToken buffer exceeded max size (%d bytes), flushing excess. This may indicate very long markers or unusual tokenization.T   zEntering tool sectionr   zDetected section end markerz3Deferring section exit: tool_call_end in same chunkzExiting tool sectionr      )rS   c                 3   s    | ]}| v V  qd S r   r   )r   r   rk   r   r   	<genexpr>  s    
z@KimiK2ToolParser.extract_tool_calls_streaming.<locals>.<genexpr>zNo tool call tokens found!zdTool section exceeded max length (%d chars), forcing exit. This may indicate malformed model output.z2In tool section before first tool, suppressing: %sz/Generating text content! skipping tool parsing.z!tool_call_end_token in delta_textr   zStarting on a new tool %sz/attempting to close tool call, but no tool callrX   zutf-8unicode_escapez"}z@Finishing tool and found diff that had not been streamed yet: %sz Completing deferred section exit)rX   )exclude_none)indexrV   )rR   z,In tool section, suppressing text generation)rR   rS   rT   rU   rY   rW   zNot enough tokenrV   )rW   )rs   rZ   rY   rV   z,Trying to parse current tool call with ID %szdiffing old arguments: %szagainst new ones: %szSkipping text %s - no argumentszSshould be impossible to have arguments reset mid-call. skipping streaming anything.zgot diff %sz+Error trying to handle streaming tool call.)1rL   rM   r'   lenr(   r+   warningrI   r&   r)   r?   rK   r/   r\   stripr   anyr<   r*   countr>   r1   r0   rstriplstripr$   r"   r%   r]   r#   r   strencodedecoderindexr   r   
model_dumprE   dictr6   rb   groupsr7   error
isinstance
startswithr_   r`   )(r   rg   rh   ri   rj   rk   rl   rP   deferred_section_exitbuffered_textfound_section_beginfound_section_endhas_tool_endpost_section_contentr   partshas_section_token_prev_tool_start_countprev_tool_end_countcur_tool_start_countcur_tool_end_counttool_call_portiontext_portion	full_textdeltadiffend_locrC   current_tool_callcurrent_tool_call_matchestool_id	tool_args	tool_namecurrent_tool_call_name_matchestool_id_strre   prev_argumentscur_argumentsdelta_argumentsr   ro   r   extract_tool_calls_streaming   s  
































%
















z-KimiK2ToolParser.extract_tool_calls_streaming)rD   N)__name__
__module____qualname__r   r!   r{   tupleboolrI   rK   rN   r   r   rf   r   intr   r   __classcell__r   r   rA   r   r      s:    M


1	r   )collections.abcr   regexr2   0vllm.entrypoints.openai.chat_completion.protocolr   'vllm.entrypoints.openai.engine.protocolr   r   r   r   r   r	   vllm.loggerr
   vllm.tokenizersr   &vllm.tool_parsers.abstract_tool_parserr   r   rL   r   r   r   r   r   <module>   s    