o
    iʅ                     @   s  d dl Z d dlZd dlZd dlZd dlZd dlmZmZ d dlm	Z	 d dlm
Z
 d dlmZ d dlmZmZ d dlmZ d dlmZ d d	lmZmZmZmZmZ d d
lmZ d dlmZ d dlmZ d dl m!Z! d dl"m#Z# d dl$m%Z% d dl&m'Z'm(Z(m)Z) d dl*m+Z+ d dl,m-Z-m.Z.m/Z/ d dl0m1Z1 d dl2m3Z3 d dl4m5Z5 d dl6m7Z7 d dl8m9Z9 d dl:m;Z; erd dl<m=Z= e>e?Z@ddddZAdeBdeBfd d!ZCG d"d# d#ZDG d$d% d%eZEd&ed'ejFdeGe fd(d)ZHG d*d+ d+eEZIG d,d- d-eEZJG d.d/ d/eEZKG d0d1 d1eKZLdS )2    N)ABCabstractmethod)Callable)AsyncExitStackreplace)TYPE_CHECKINGUnion)"ResponseFunctionToolCallOutputItem)Mcp)AuthorMessageRoleStreamStateTextContent)envs)ChatTemplateContentFormatOption)
MCP_PREFIX)Tool)
ToolServer)FunctionCall)get_encoding#get_streamable_parser_for_assistantrender_for_completion)'get_responses_parser_for_simple_context)ResponseInputOutputItemResponseRawMessageAndTokenResponsesRequest)construct_tool_dicts)RequestOutput)ReasoningParser)TokenizerLike)
ToolParser)random_uuid)ClientSessionweb_search_previewcode_interpreter	container)browserpythonr'   	tool_namereturnc                 C   s2   | t vrdt  }td|  d| t |  S )Nz, zBuilt-in tool name 'z+' not defined in mapping. Available tools: )_TOOL_NAME_TO_TYPE_MAPjoinkeys
ValueError)r*   available_tools r1   _/home/ubuntu/vllm_env/lib/python3.10/site-packages/vllm/entrypoints/openai/responses/context.py_map_tool_name_to_tool_typeA   s   r3   c                   @   sL   e Zd ZdZ				ddededededdf
d	d
ZdddZdddZdS )TurnMetricszATracks token and toolcall details for a single conversation turn.r   input_tokensoutput_tokenscached_input_tokenstool_output_tokensr+   Nc                 C   s   || _ || _|| _|| _d S Nr5   r6   r7   r8   )selfr5   r6   r7   r8   r1   r1   r2   __init__N   s   
zTurnMetrics.__init__c                 C   s   d| _ d| _d| _d| _dS )zReset counters for a new turn.r   Nr:   r;   r1   r1   r2   resetZ   s   
zTurnMetrics.resetc                 C   s   t | j| j| j| jS )z*Create a copy of this turn's token counts.)r4   r5   r6   r7   r8   r=   r1   r1   r2   copya   s   zTurnMetrics.copy)r   r   r   r   r+   N)r+   r4   )__name__
__module____qualname____doc__intr<   r>   r?   r1   r1   r1   r2   r4   K   s&    

r4   c                   @   s   e Zd ZededdfddZedddZedee fdd	Z	ede
fd
dZedee fddZededB dededeeef ddf
ddZedddZdS )ConversationContextoutputr+   Nc                 C      d S r9   r1   r;   rG   r1   r1   r2   append_outputl      z!ConversationContext.append_outputc                 C   rH   r9   r1   rI   r1   r1   r2   append_tool_outputp   rK   z&ConversationContext.append_tool_outputc                       d S r9   r1   r=   r1   r1   r2   	call_toolt   s   zConversationContext.call_toolc                 C   rH   r9   r1   r=   r1   r1   r2   need_builtin_tool_callx   rK   z*ConversationContext.need_builtin_tool_callc                 C   rH   r9   r1   r=   r1   r1   r2   r   |   rK   z)ConversationContext.render_for_completiontool_server
exit_stack
request_id	mcp_toolsc                    rM   r9   r1   r;   rP   rQ   rR   rS   r1   r1   r2   init_tool_sessions   s   z&ConversationContext.init_tool_sessionsc                    
   t dNzShould not be called.NotImplementedErrorr=   r1   r1   r2   cleanup_session   s   z#ConversationContext.cleanup_sessionr@   )rA   rB   rC   r   r   rJ   rL   listr   rN   boolrO   rE   r   r   r   strdictr   rU   rZ   r1   r1   r1   r2   rF   k   s2    
	rF   last_msgec                 C   sB   dt | d}t|d}ttj| jd}t||gtj| jdgS )z:
    Creates an error message when json parse failed.
    z&Error parsing tool arguments as JSON: zE. Please ensure the tool call arguments are valid JSON and try again.textrolenameauthorcontent	recipientchannel)	r]   r   r   r   TOOLri   r   	ASSISTANTrj   )r_   r`   	error_msgrh   rg   r1   r1   r2   !_create_json_parse_error_messages   s   
rn   c                
   @   s   e Zd ZdZdd ZdddZededB fdd	Zdd
dZ	de
fddZdee fddZdee fddZdedB dededeeef ddf
ddZdddZdS )SimpleContextz3This is a context that cannot handle MCP tool callsc                 C   sF   d | _ d| _g | _g | _d| _d| _d| _d| _g | _g | _	g | _
d S N r   )last_output_accumulated_text_accumulated_token_ids_accumulated_logprobsnum_prompt_tokensnum_output_tokensnum_cached_tokensnum_reasoning_tokensall_turn_metricsinput_messagesoutput_messagesr=   r1   r1   r2   r<      s   
zSimpleContext.__init__r+   Nc                 C   s   || _ t|tstdt|jpg | _|jpd| _|  jt|j	d j
p%g 7  _|j	d }|  j|j7  _| j|j
 |jd urI| j|j t| jdkrd|jpTd}|jpYg }| jt||d | jt|j|j
d d S )Nz*SimpleContext only supports RequestOutput.r   rq   messagetokens)rr   
isinstancer   r/   lenprompt_token_idsrv   rx   rw   outputs	token_idsrs   rb   rt   extendlogprobsru   r{   promptappendr   r|   )r;   rG   delta_outputoutput_promptoutput_prompt_token_idsr1   r1   r2   rJ      s4   




zSimpleContext.append_outputc                 C   s   | j dur=| j jr=t| j tsJ t| j }dd | j jD |_| j|jd _t| j|jd _	| j
r;| j
|jd _|S | j S )z?Return the final output, with complete text/token_ids/logprobs.Nc                 S   s   g | ]}t |qS r1   r   ).0itemr1   r1   r2   
<listcomp>   s    z.SimpleContext.final_output.<locals>.<listcomp>r   )rr   r   r   r   r?   rs   rb   tuplert   r   ru   r   )r;   final_outputr1   r1   r2   r      s   zSimpleContext.final_outputc                 C      t drW   rX   rI   r1   r1   r2   rL         z SimpleContext.append_tool_outputc                 C   s   dS )NFr1   r=   r1   r1   r2   rO      s   z$SimpleContext.need_builtin_tool_callc                    rV   rW   rX   r=   r1   r1   r2   rN         zSimpleContext.call_toolc                 C   r   rW   rX   r=   r1   r1   r2   r      r   z#SimpleContext.render_for_completionrP   rQ   rR   rS   c                    rM   r9   r1   rT   r1   r1   r2   rU      s   z SimpleContext.init_tool_sessionsc                    rV   rW   rX   r=   r1   r1   r2   rZ      r   zSimpleContext.cleanup_sessionr@   )rA   rB   rC   rD   r<   rJ   propertyr   r   rL   r\   rO   r[   r   rN   rE   r   r   r   r]   r^   r   rU   rZ   r1   r1   r1   r2   ro      s,    



	ro   c                   @   sH  e Zd Zdee dedeegef dB dedee	 dB deege
f dB de	dB d	efd
dZdeddfddZdee ddfddZdefddZdedef dedee fddZdedef dedee fddZdedef dedee fddZdee fddZdd  Zd!edB d"ed#e	d$ee	ef fd%d&Zd)d'd(Z dS )*ParsableContextresponse_messages	tokenizerreasoning_parser_clsNrequestr0   tool_parser_clschat_templatechat_template_content_formatc          	      C   s   d| _ d| _d| _d| _g | _|d u rtdt|||||d| _|| _|| _	|p*g | _
i | _t | _t|j|j| _|| _|| _g | _g | _d S )Nr   z&reasoning_parser_cls must be provided.)r   r   r   r   r   )rv   rw   rx   ry   rz   r/   r   parserr   r   r0   _tool_sessionssetcalled_toolsr   toolstool_choice
tool_dictsr   r   r{   r|   )	r;   r   r   r   r   r0   r   r   r   r1   r1   r2   r<     s0   

zParsableContext.__init__rG   r+   c                 C   s   t |jpg | _|jpd| _|  jt |jd jpg 7  _| j|jd  | j	j
rd|jp.d}|jp3g }t | jdkrF| jt||d n
| jt||d | jt|jd j|jd jd d S d S )Nr   rq   r}   )r   r   rv   rx   rw   r   r   r   processr   enable_response_messagesr   r{   r   r   r|   rb   )r;   rG   r   r   r1   r1   r2   rJ   /  s6   



zParsableContext.append_outputc                 C   s   | j j| d S r9   )r   r   r   rI   r1   r1   r2   rL   N  s   z"ParsableContext.append_tool_outputc                 C   s4   | j jd }|jdkr|jdv s|jdrdS dS )z2Return true if the last message is a MCP tool callfunction_call)r&   r)   r%   r'   TF)r   r   typere   
startswith)r;   last_messager1   r1   r2   rO   Q  s   


z&ParsableContext.need_builtin_tool_calltool_sessionr$   r_   c                    s   | j d t|tr|| I d H S t|j}d|d i}|d|I d H }|j	d j
}tdt  ddt  |dd}|gS )	Nr)   coder   mcpo_function_call_outputcall_	completedidr   call_idrG   status)r   addr   r   get_result_parsable_contextjsonloads	argumentsrN   rh   rb   r
   r#   )r;   r   r_   argsparamresult
result_strr~   r1   r1   r2   call_python_tool_  s"   


z ParsableContext.call_python_toolc              
      s   | j d t|tr|| I d H S tjr8zt|j	}W n tj
y7 } zt||W  Y d }~S d }~ww t|j	}|d|I d H }|jd j}tdt  ddt  |dd}|gS )	Nr(   searchr   fco_r   r   r   r   r   r   r   r   r   r   $VLLM_TOOL_JSON_ERROR_AUTOMATIC_RETRYr   r   r   JSONDecodeErrorrn   rN   rh   rb   r
   r#   r;   r   r_   r   r`   r   r   r~   r1   r1   r2   call_search_toolv  s,   


z ParsableContext.call_search_toolc              
      s   | j d t|tr|| I dH S tjr8zt|j	}W n tj
y7 } zt||W  Y d}~S d}~ww t|j	}|d|I dH }|jd j}tdt  ddt  |dd	}|gS )
  
        Call container tool. Expect this to be run in a stateful docker
        with command line terminal.
        The official container tool would at least
        expect the following format:
        - for tool name: exec
            - args:
                {
                    "cmd":List[str] "command to execute",
                    "workdir":optional[str] "current working directory",
                    "env":optional[object/dict] "environment variables",
                    "session_name":optional[str] "session name",
                    "timeout":optional[int] "timeout in seconds",
                    "user":optional[str] "user name",
                }
        r'   Nexecr   r   r   r   r   r   r   r   r1   r1   r2   call_container_tool  s,   


z#ParsableContext.call_container_toolc                    s   | j jsg S | j jd }t t  |_|| j jd< |jdkr,| | jd |I d H S |jdkr=| | jd |I d H S |j	drO| 
| jd |I d H S g S )Nr   r&   r)   r%   r(   r'   )r   r   r   r#   r   re   r   r   r   r   r   )r;   r_   r1   r1   r2   rN     s   



zParsableContext.call_toolc                 C   r   rW   rX   r=   r1   r1   r2   r     r   z%ParsableContext.render_for_completionrP   rQ   rR   rS   c           	         st   |r6| j D ]1}|| jv rqt|}||v r|| jnd }|||||I d H }|| j|< || j qd S d S r9   r0   r   r3   headersenter_async_contextnew_sessionpush_async_exitrZ   	r;   rP   rQ   rR   rS   r*   	tool_typer   r   r1   r1   r2   rU     s   



z"ParsableContext.init_tool_sessionsc                    0   dd  t j fddjD  I dH  dS )(Can be used as coro to used in __aexit__c                    `   t | ts.td| j tt | di I d H  W d    d S 1 s'w   Y  d S d S NzCleaning up tool session for %srZ   	r   r   loggerinfo_client_info
contextlibsuppress	ExceptionrN   r   r1   r1   r2   cleanup_tool_session     
"z=ParsableContext.cleanup_session.<locals>.cleanup_tool_sessionc                 3       | ]
} j | V  qd S r9   r   r   toolr   r;   r1   r2   	<genexpr>  
    
z2ParsableContext.cleanup_session.<locals>.<genexpr>Nasynciogatherr   r;   r   kwargsr1   r   r2   rZ        zParsableContext.cleanup_sessionr@   )!rA   rB   rC   r[   r   r!   r   r    r   r]   r"   r   r<   r   rJ   rL   r\   rO   r	   r   r   r   r   r   r   rN   r   r   r   r^   r   rU   rZ   r1   r1   r1   r2   r     sl    
	

,





+

r   c                
   @   sP  e Zd Zdedee fddZdd Zdedd	fd
dZdee	 dd	fddZ
dedd	fddZdedefddZedefddZdefddZdee	 fddZdee fddZdedef de	dee	 fddZdedef de	dee	 fdd Zd!ed	B d"ed#ed$eeef fd%d&Zdedef de	dee	 fd'd(Zd+d)d*Zd	S ),HarmonyContextmessagesr0   c                 C   sn   || _ d | _|| _i | _t | _t | _t|| _	d| _
d| _d| _d| _d| _t | _g | _d| _d| _d S )Nr   T)	_messagesfinish_reasonr0   r   r   r   r   r   r   num_init_messagesrv   rw   rx   ry   num_tool_output_tokensr4   current_turn_metricsrz   is_first_turnfirst_tok_of_message)r;   r   r0   r1   r1   r2   r<     s    

zHarmonyContext.__init__c                 C   s"   | j jdv r|  jd7  _d S d S )N>   analysis
commentary   )r   current_channelry   r=   r1   r1   r2   _update_num_reasoning_tokens  s   z+HarmonyContext._update_num_reasoning_tokensrG   r+   Nc                 C   s   |j d j}t | _|D ]}| j| |   q| | | | | j	| j
  | j
  | jj}|j d j| _| j| d S )Nr   )r   r   r   r   r   r   _update_prefill_token_usage_update_decode_token_usagerz   r   r   r?   r>   r   r   r   r   )r;   rG   output_token_idstoken_idoutput_msgsr1   r1   r2   rJ     s   



zHarmonyContext.append_outputc                 C   s   |}| j | d S r9   )r   r   )r;   rG   r   r1   r1   r2   rL   ,  s   z!HarmonyContext.append_tool_outputc                 C   s   |j durt|j }nd}td || j_|  j|7  _| jr$d| _n-| jd }| jj|j |j	 }|dk rFtd|| jj|j|j	 d}|  j
|7  _
|| j_|j}|dure|  j|7  _|| j_dS dS )a  Update token usage statistics for the prefill phase of generation.

        The prefill phase processes the input prompt tokens. This method:
        1. Counts the prompt tokens for this turn
        2. Calculates tool output tokens for multi-turn conversations
        3. Updates cached token counts
        4. Tracks state for next turn calculations

        Tool output tokens are calculated as:
        current_prompt_tokens - last_turn_prompt_tokens -
        last_turn_output_tokens
        This represents tokens added between turns (typically tool responses).

        Args:
            output: The RequestOutput containing prompt token information
        Nr   z4RequestOutput appended contains no prompt_token_ids.Fr   zsNegative tool output tokens calculated: %d (current_input=%d, previous_input=%d, previous_output=%d). Setting to 0.)r   r   r   errorr   r5   rv   r   rz   r6   r   r8   rx   r7   )r;   rG   this_turn_input_tokensprevious_turnthis_turn_tool_tokensnum_cached_tokenr1   r1   r2   r   0  s>   


	z*HarmonyContext._update_prefill_token_usagec                 C   sF   d}|j r!|j D ]	}|t|j7 }q|  j|7  _| j j|7  _|S )ay  Update token usage statistics for the decode phase of generation.

        The decode phase processes the generated output tokens. This method:
        1. Counts output tokens from all completion outputs
        2. Updates the total output token count
        3. Tracks tokens generated in the current turn

        In streaming mode, this is called for each token generated.
        In non-streaming mode, this is called once with all output tokens.

        Args:
            output: The RequestOutput containing generated token information

        Returns:
            int: Number of output tokens processed in this call
        r   )r   r   r   rw   r   r6   )r;   rG   updated_output_token_countcompletion_outputr1   r1   r2   r   p  s   
z)HarmonyContext._update_decode_token_usagec                 C      | j S r9   r   r=   r1   r1   r2   r        zHarmonyContext.messagesc                 C   s6   | j d }|j}|d uo|dp|dp|dS )Nr   browser.r)   
container.)r   ri   r   r;   r_   ri   r1   r1   r2   rO     s   

z%HarmonyContext.need_builtin_tool_callc                    s   | j sg S | j d }|j}|d urE|dr#| | jd |I d H S |dr4| | jd |I d H S |drE| | jd |I d H S td)Nr   r  r(   r)   r	  r'   zNo tool call found)r   ri   r   r   r   r   r   r/   r
  r1   r1   r2   rN     s&   









zHarmonyContext.call_toolc                 C   s
   t | jS r9   )r   r   r=   r1   r1   r2   r     s   
z$HarmonyContext.render_for_completionr   r$   r_   c           
   
      s   | j d t|tr|| I d H S |jdd }tjrCzt	
|jd j}W n! t	jyB } zt||W  Y d }~S d }~ww t	
|jd j}|||I d H }|jd j}t|d}ttj|jd}	t|	|gtj|jdgS )Nr(   .r   r   ra   rc   rf   r   r   r   r   
get_resultri   splitr   r   r   r   rh   rb   r   rn   rN   r   r   r   rk   r   rl   rj   
r;   r   r_   r*   r   r`   r   r   rh   rg   r1   r1   r2   r     s0   

zHarmonyContext.call_search_toolc                    s   | j d t|tr|| I d H S d|jd ji}|d|I d H }|jd j}t|d}t	t
jdd}t||g|jt
jdgS )Nr)   r   r   ra   rc   )rg   rh   rj   ri   )r   r   r   r   r  rh   rb   rN   r   r   r   rk   r   rj   rl   )r;   r   r_   r   r   r   rh   rg   r1   r1   r2   r     s"   

zHarmonyContext.call_python_toolrP   rQ   rR   rS   c           	         sr   |r5| j D ]0}|| jvr4t|}||v r|| jnd }|||||I d H }|| j|< || j qd S d S r9   r   r   r1   r1   r2   rU     s   



z!HarmonyContext.init_tool_sessionsc           
   
      s   | j d t|tr|| I dH S |jdd dd }tjrHzt	
|jd j}W n! t	jyG } zt||W  Y d}~S d}~ww t	
|jd j}|||I dH }|jd j}t|d}ttj|jd}	t|	|gtj|jd	gS )
r   r'   Nr  r    r   ra   rc   rf   r  r  r1   r1   r2   r     s0   

z"HarmonyContext.call_container_toolc                    r   )r   c                    r   r   r   r   r1   r1   r2   r   #  r   z<HarmonyContext.cleanup_session.<locals>.cleanup_tool_sessionc                 3   r   r9   r   r   r   r1   r2   r   ,  r   z1HarmonyContext.cleanup_session.<locals>.<genexpr>Nr   r   r1   r   r2   rZ      r   zHarmonyContext.cleanup_sessionr@   )rA   rB   rC   r[   r]   r<   r   r   rJ   r   rL   r   rE   r   r   r   r\   rO   rN   r   r	   r   r   r   r   r   r^   r   rU   r   rZ   r1   r1   r1   r2   r     s^    
@	







+r   c                       s   e Zd Z fddZedefddZdeddfdd	Zdee	 ddfd
dZ
defddZdefddZdee f fddZ  ZS )StreamingHarmonyContextc                    s>   t  j|i | d | _t | _t | _d | _d| _d | _	d S )NT)
superr<   rr   r   r   r   encodinglast_tokr   last_content_deltar   	__class__r1   r2   r<   4  s   
z StreamingHarmonyContext.__init__r+   c                 C   r  r9   r  r=   r1   r1   r2   r   >  r  z StreamingHarmonyContext.messagesrG   Nc                 C   s   d | _ | jr| | |j| _d}|jd jD ]}| j| || jj p%d7 }q|r-|| _ | | |jrC| j	
| j  | j  |   || _t| j| j t| jjk rl| j| jjt| j| j d   d S d S rp   )r  r   r   finishedr   r   r   r   r   rz   r   r   r?   r>   r   r  r   r   r   r   r   )r;   rG   last_delta_texttokr1   r1   r2   rJ   B  s*   


z%StreamingHarmonyContext.append_outputc                 C   sj   t |dks
J d|d }|jjtjkr|jd u rd|_| j|}|D ]}| j	| q%|d | _
d S )Nr   z&Tool output should be a single messager   	assistantr   )r   rg   rd   r   rk   ri   r  renderr   r   r  )r;   rG   msgtoksr  r1   r1   r2   rL   a  s   z*StreamingHarmonyContext.append_tool_outputc                 C   s   | j jtjkS r9   )r   stater   EXPECT_STARTr=   r1   r1   r2   is_expecting_starto  s   z*StreamingHarmonyContext.is_expecting_startc                 C   s   | j | j v S r9   )r  r  !stop_tokens_for_assistant_actionsr=   r1   r1   r2   is_assistant_action_turnr  s   z0StreamingHarmonyContext.is_assistant_action_turnc                    sb   t   }d}g }|| | jkr"|||  |d8 }|| | jkst|D ]}| j| q&|S )Nr   r   )r  r   r  r   reversedr   r   )r;   rendered_tokenslast_n
to_processr  r  r1   r2   r   u  s   
z-StreamingHarmonyContext.render_for_completion)rA   rB   rC   r<   r   r[   r   r   rJ   r   rL   r\   r!  r#  rE   r   __classcell__r1   r1   r  r2   r  3  s    
r  )Mr   r   r?   r   loggingabcr   r   collections.abcr   r   dataclassesr   typingr   r	   >openai.types.responses.response_function_tool_call_output_itemr
   openai.types.responses.toolr   openai_harmonyr   r   r   r   r   vllmr   vllm.entrypoints.chat_utilsr   vllm.entrypoints.constantsr   vllm.entrypoints.mcp.toolr    vllm.entrypoints.mcp.tool_serverr   'vllm.entrypoints.openai.engine.protocolr   ,vllm.entrypoints.openai.parser.harmony_utilsr   r   r   /vllm.entrypoints.openai.parser.responses_parserr   *vllm.entrypoints.openai.responses.protocolr   r   r   'vllm.entrypoints.openai.responses.utilsr   vllm.outputsr   $vllm.reasoning.abs_reasoning_parsersr    vllm.tokenizersr!   &vllm.tool_parsers.abstract_tool_parserr"   
vllm.utilsr#   
mcp.clientr$   	getLoggerrA   r   r,   r]   r3   r4   rF   r   r[   rn   ro   r   r   r  r1   r1   r1   r2   <module>   sh   

 $
] w  =