o
    i                    @   s`  d dl Z d dlZd dlZd dlZd dlmZ d dlmZmZm	Z	m
Z
 d dlmZ d dlmZ d dlmZmZ d dlmZ d dlmZ d dlZd d	lmZ d d
lmZmZmZmZmZmZmZm Z m!Z!m"Z"m#Z#m$Z$m%Z%m&Z&m'Z'm(Z(m)Z)m*Z*m+Z+m,Z,m-Z-m.Z.m/Z/m0Z0m1Z1m2Z2m3Z3m4Z4m5Z5m6Z6m7Z7m8Z8 d dl9m:Z: d dl;m<Z<m=Z= d dl>m?Z@ d dlAmBZBmCZC d dlDmEZF d dlGmHZH d dlImJZJ d dlKmLZL d dlMmNZNmOZO d dlPmQZQ d dlRmSZS d dlTmUZUmVZVmWZW d dlXmYZYmZZZ d dl[m\Z\ d dl]m^Z^m_Z_m`Z`maZambZbmcZcmdZdmeZemfZfmgZg d dlhmiZimjZjmkZkmlZlmmZm d dlnmoZompZpmqZqmrZrmsZsmtZtmuZumvZvmwZwmxZxmyZymzZz d dl{m|Z|m}Z}m~Z~ d dlmZ d dlmZ d dlmZ d d lmZ d d!lm<Z d d"lmZ d d#lmZ d d$lmZ d d%lmZ d d&lmZmZ d d'lmZ d d(lmZ eeZeG d)d* d*Zd+eeC d,eeee dB f fd-d.ZG d/d0 d0eZZdS )1    N)deque)AsyncGeneratorAsyncIteratorCallableSequence)AsyncExitStack)copy)	dataclassreplace)
HTTPStatus)Final)Request) )ResponseCodeInterpreterCallCodeDeltaEvent(ResponseCodeInterpreterCallCodeDoneEvent)ResponseCodeInterpreterCallCompletedEvent*ResponseCodeInterpreterCallInProgressEvent,ResponseCodeInterpreterCallInterpretingEvent$ResponseCodeInterpreterToolCallParamResponseContentPartAddedEventResponseContentPartDoneEvent'ResponseFunctionCallArgumentsDeltaEvent&ResponseFunctionCallArgumentsDoneEventResponseFunctionToolCallResponseFunctionWebSearch"ResponseMcpCallArgumentsDeltaEvent!ResponseMcpCallArgumentsDoneEventResponseMcpCallCompletedEventResponseMcpCallInProgressEventResponseOutputItemResponseOutputItemAddedEventResponseOutputItemDoneEventResponseOutputMessageResponseOutputTextResponseReasoningItemResponseReasoningTextDeltaEventResponseReasoningTextDoneEventResponseStatusResponseTextDeltaEventResponseTextDoneEvent#ResponseWebSearchCallCompletedEvent$ResponseWebSearchCallInProgressEvent#ResponseWebSearchCallSearchingEventresponse_function_web_searchresponse_text_delta_event)McpCall)LogprobLogprobTopLogprob)Content)McpTool)Message)TypeAdapter)envs)EngineClient)ChatCompletionMessageParamChatTemplateContentFormatOption)RequestLogger)
ToolServer)DeltaMessageErrorResponseRequestResponseMetadata)GenerationErrorOpenAIServing)OpenAIServingModels)
)construct_harmony_previous_input_messagesget_developer_message%get_stop_tokens_for_assistant_actionsget_system_messageget_user_messagehas_custom_toolsparse_output_messageparse_remaining_stateparse_response_inputrender_for_completion)ConversationContextHarmonyContextParsableContextSimpleContextStreamingHarmonyContext)InputTokensDetailsOutputTokensDetailsResponseCompletedEventResponseCreatedEventResponseInProgressEventResponseInputOutputMessageResponseReasoningPartAddedEventResponseReasoningPartDoneEventResponsesRequestResponsesResponseResponseUsageStreamingResponsesResponse)construct_input_messagesconstruct_tool_dictsextract_tool_types)get_max_tokens)VLLMValidationError)TokensPrompt)init_logger)r/   )SampleLogprobs)CompletionOutput)ParserManager)	TokPrompt)SamplingParamsStructuredOutputsParams)TokenizerLike)random_uuidc                   @   sX   e Zd ZU dZdZeed< dZeed< dZe	ed< dZ
eed	< dZeed
< dddZdS )HarmonyStreamingStatez5Mutable state for harmony streaming event processing.current_content_indexr   current_output_index current_item_idFsent_output_item_addedis_first_function_call_deltareturnNc                 C   s   |  j d7  _ d| _d| _dS )z-Reset state when expecting a new output item.   FN)ro   rr   rs   self rx   _/home/ubuntu/vllm_env/lib/python3.10/site-packages/vllm/entrypoints/openai/responses/serving.pyreset_for_new_item   s   
z(HarmonyStreamingState.reset_for_new_item)rt   N)__name__
__module____qualname____doc__rn   int__annotations__ro   rq   strrr   boolrs   rz   rx   rx   rx   ry   rl      s   
 rl   toolsrt   c                 C   sr   i }| D ]2}t |tsqd}|jdur't |jtr|j}n
t|jdr'|jj}|dur1d|v r1d}|||j< q|S )a  
    Extract allowed_tools mapping from MCP tool requests.

    Returns a dictionary mapping server_label to allowed_tools list.
    Handles both list format and McpAllowedToolsMcpToolFilter object format.

    Special handling:
    - If allowed_tools is None, returns None (allows all tools)
    - If allowed_tools contains "*", returns None (allows all tools)
    - Otherwise, returns the list of specific tool names

    This function can be reused for both harmony and non-harmony MCP calls.
    N
tool_names*)
isinstancer2   allowed_toolslisthasattrr   server_label)r   allowed_tools_maptoolallowed_tools_valrx   rx   ry   (_extract_allowed_tools_from_mcp_requests   s   

r   c                       s  e Zd ZU dddddddddd	dedededB dedB d	ed
ededededB de	dB dededededdf fddZ
dededB fddZdededB fddZ	ddededB deedf eB eB fddZdededB fd d!ZdededB fd"d#Zded$ed%efd&d'Z	dded(ed)ee d$ed*ed+ed,ed-edB deeB fd.d/Zd0edB defd1d2Z d3d4d5d6Z!e"e#eef  e$d7< d8e#ee%f d9ed+ede&e' fd:d;Z(	dd<e)e d8e*dB d+ed9edB de&e+ f
d=d>Z,	dd<e)e d8e*dB d+ed9edB de&e-j+ f
d?d@Z.dedAe/d+ede&e0 fdBdCZ1d$e2de&e0 fdDdEZ3dedB fdFdGZ4dedHedIe5e de6fdJdKZ7dededB de&e6 fdLdMZ8defdNdOZ9defdPdQZ:	ddRedSedB deedf fdTdUZ;dRedSedB dVedB deeB eedf B fdWdXZ<dRedeeB fdYdZZ=dRedefd[d\Z>defd]d^Z?ded(ed)eedB  d$ed*ed+ed,ed-ed_e@egef deedf fd`daZAdbeBde&e fdcddZCdbeBde&e fdedfZDdbeBde&e fdgdhZEdbeBde&e fdidjZFdbeBde&e fdkdlZGdmeHdbeBde&e fdndoZIdmeHdbeBde&e fdpdqZJdmeHdbeBd0ede&e fdrdsZKdmeHdbeBde&e fdtduZLdmeHdbeBde&e fdvdwZMdmeHdbeBde&e fdxdyZNdbeBde&e fdzd{ZOdbeBde&e fd|d}ZPdbeBde&e fd~dZQdbeBde&e fddZRdmeHdbeBde&e fddZSdmeHdbeBde&e fddZTded(ed)eedB  d$ed*ed+ed,ed-ed_e@egef deedf fddZU	dded(ed)eedB  d$ed*ed+ed,ed-edB deedf fddZV  ZWS )OpenAIServingResponsesFrp   N)	return_tokens_as_token_idsreasoning_parserenable_auto_toolstool_parsertool_serverenable_prompt_tokens_detailsenable_force_include_usageenable_log_outputslog_error_stackengine_clientmodelsrequest_loggerchat_templatechat_template_content_formatr   r   r   r   r   r   r   r   r   rt   c                   s*  t  j|||||d || _|| _|| _tj|	||| jjd| _	|| _
|| _| j | _tj| _| jr8td | jjjdk| _| jr[td d| jvrRg | jd< | jd t  t| jdd }| jjjdksut|try|d	dkryd| _nd
| _|| _i | _ t!" | _#i | _$i | _%i | _&|
| _'d S )N)r   r   r   r   r   )tool_parser_namereasoning_parser_namer   
model_namezz`VLLM_ENABLE_RESPONSES_API_STORE` is enabled. This may cause a memory leak since we never remove responses from the store.gpt_osszLFor gpt-oss, we ignore --enable-auto-tool-choice and always enable tool use.stop_token_idshf_overrideskimi_k2
model_typerandom)(super__init__r   r   r   rf   
get_parsermodel_configmodelparserr   r   get_diff_sampling_paramdefault_sampling_paramsr6   VLLM_ENABLE_RESPONSES_API_STOREenable_storeloggerwarning_once	hf_configr   use_harmonywarningextendrD   getattrhf_text_configr   dictgettool_call_id_typer   response_storeasyncioLockresponse_store_lock	msg_storeevent_storebackground_tasksr   )rw   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   	__class__rx   ry   r      sb   




zOpenAIServingResponses.__init__engine_promptc                 C   s@   |  |}| j|krd| d| j d}| jd|tjddS dS )z3Add validations to the input to the generator here.zThe engine prompt length z exceeds the max_model_len z. Please reduce prompt.invalid_request_errorinputerr_typemessagestatus_codeparamN)_extract_prompt_lenmax_model_lencreate_error_responser   BAD_REQUEST)rw   r   
prompt_lenerror_messagerx   rx   ry   _validate_generator_input%  s   

z0OpenAIServingResponses._validate_generator_inputrequestc                 C   sl   | j r| r| jddtjddS |jr$| js$|jr$| jddtjddS |jr4|j	r4| jddtjddS d S )	Nr   z.logprobs are not supported with gpt-oss modelslogprobsr   zThis vLLM engine does not support `store=True` and therefore does not support the background mode. To enable these features, set the environment variable `VLLM_ENABLE_RESPONSES_API_STORE=1` when launching the vLLM server.
backgroundzLOnly one of `previous_input_messages` and `previous_response_id` can be set.previous_response_id)
r   is_include_output_logprobsr   r   r   storer   r   previous_input_messagesr   )rw   r   rx   rx   ry    _validate_create_responses_input:  s,   z7OpenAIServingResponses._validate_create_responses_inputraw_requestc                     sz   |I d H }|d urtd| |S |}|d ur |S jjr(jj|jr1js1d|_|j	}|d urej
4 I d H  j|}W d   I d H  n1 I d H sVw   Y  |d u rd|S nd }z$|}j|}jr||\}	}
n||I d H \}	}
W n" ttttjtfy } ztd |W  Y d }~S d }~ww t|jd}|r||j_g }g }j d urj !dr|"d j !dr|"d j !dr|"d j d ur|}n
t#|dksJ g }zÈjj$}|% }|
D ]}&|}|d ur|  W S t'j(|j)*|j+}|,|j+}|-j.}|d u r3d n/|j0I d H }jrP|j1rJt2|	|}n,t3|	|}n&t4j5rst6|	|j7r_j7j8nd |j7rij7j9nd |j:j;d	}nt< }j7rj7j8d urj78|}t=|j> }t?r|@ rtA||B|jCj d
|_>jD|j||||||jE|d}|"| qW n ty } z|W  Y d }~S d }~ww t#|dksJ |\}|jr|	jF|j< |jGrotHtII }tJjK||||g dd d}j
4 I d H  |j|jL< W d   I d H  n1 I d H sw   Y  |j1r<tMjNO||||||||d|j d}ntMjNP||||||||d|jL d}|jL |jQ < |R fdd |j1rmS|jS |S |j1r~T|||||||S zU|||||||I d H W S  tVy } zW|W  Y d }~S d }~w tXy } z|W  Y d }~S d }~ww )NzError with model %sFz$Error in preprocessing prompt inputs)
request_idbrowserpython	containerr   )response_messages	tokenizerreasoning_parser_clsr   tool_parser_clsavailable_toolsr   r   )structural_tag)r   r   sampling_params
tok_paramscontextlora_requestprioritytrace_headersru   queuedr   created_timeoutputstatususagecreate_)namec                    s   j  d S N)r   pop)_response_idrw   rx   ry   <lambda>;  s    z9OpenAIServingResponses.create_responses.<locals>.<lambda>)Y_check_modelr   errorr   r   errored
dead_errorr   r   r   r   r   r   _make_not_found_error_maybe_get_adaptersr   r   r   _make_request_with_harmony_make_request
ValueError	TypeErrorRuntimeErrorjinja2TemplateErrorNotImplementedError	exceptionr   r>   r   staterequest_metadatar   has_toolappendlenrendererget_tokenizerr   r`   r   max_output_tokensr   r   to_sampling_paramsbuild_tok_paramsr   _get_trace_headersheadersstreamrP   rM   r6   $VLLM_USE_EXPERIMENTAL_PARSER_CONTEXTrN   r   r   r   r   r   rO   r   structured_outputsri   'all_non_structural_tag_constraints_noner
   prepare_structured_tagr   _generate_with_builtin_toolsr   r   r   r   timerZ   from_requestidr   create_task_run_background_request_stream_run_background_requestr   add_done_callback%responses_background_stream_generatorresponses_stream_generatorresponses_full_generatorr?   %_convert_generation_error_to_response	Exception) rw   r   r   error_check_retmaybe_validation_errorprev_response_idprev_responser   r   messagesengine_promptser  
generatorsbuiltin_tool_listr   r  r   r   maybe_errordefault_max_tokensr   r   r   r   r   
struct_out	generatorresult_generatorr   responsetaskrx   r   ry   create_responses[  s  	
(














L	*





	z'OpenAIServingResponses.create_responsesr+  c              	      s|   t |j|j}t|j|j|r| j|jnd |r|j	nd d}| j
||| j| jd || jr1| jjnd dI d H \}}||fS )N)request_instructionsrequest_inputprev_msgprev_response_output)default_templatedefault_template_content_formatdefault_template_kwargs
tool_dictsr   )r^   r   tool_choicer]   instructionsr   r   r   r  r   _preprocess_chatr   r   r   r   )rw   r   r+  r@  r,  r   r-  rx   rx   ry   r  \  s$   	z$OpenAIServingResponses._make_requestc                 C   sN   |j dkr	td| ||}t|}t|d}|jd ur"|j|d< ||gfS )NautozAOnly 'auto' tool_choice is supported in response API with Harmony)prompt_token_ids
cache_salt)rA  r  &_construct_input_messages_with_harmonyrK   rb   rF  )rw   r   r+  r,  rE  r   rx   rx   ry   r  u  s   




z1OpenAIServingResponses._make_request_with_harmonyr   
exit_stackc                    sB   t |jdkr
d S dd |jD }|| j||j|I d H  d S )Nr   c                 S   s   i | ]}|j d kr|j|qS )mcp)typer   ).0r   rx   rx   ry   
<dictcomp>  s    zDOpenAIServingResponses._initialize_tool_sessions.<locals>.<dictcomp>)r  r   init_tool_sessionsr   r   )rw   r   r   rH  	mcp_toolsrx   rx   ry   _initialize_tool_sessions  s   z0OpenAIServingResponses._initialize_tool_sessionsr   r5  r   r   r  r   c	                    sx  |d u rt t }t 4 I d H [}	z| |||	I d H  |2 z3 d H W }
q 6 W n9 tjyB   | d Y W  d   I d H  S  tyb } z| |W  Y d }~W  d   I d H  S d }~ww W d   I d H  n1 I d H ssw   Y  d}d }d }| jrt	|t
sJ | |}|jr|jd |j }|j|jd  }|j}t|dkr|jdkrd}n{|jdkrd}ns| |j|j njd}ngt	|tr|j }|jr|j}|j}d}|jjdkrd}nIt	|tsJ |j}|d usJ t|jdksJ |jd }| |j|j |jdkrd}| |||}|jr"|j}|j}|jd us*J d}t	|tt
tfs7J |j}|j}|j }|j!}t"|||| t#|d	d
 |j$D dd
 |j$D dt%||dd
 |j$D dd
 |j$D dd}t&j'|||||||||d	}|j(r| j)4 I d H $ | j*+|j,}|d u s|j-dkr|| j*|j,< W d   I d H  |S 1 I d H sw   Y  |S )NzClient disconnected	completedr   length
incompleteabort	cancelledru   c                 S      g | ]}|j qS rx   )input_tokensrK  turnrx   rx   ry   
<listcomp>      zCOpenAIServingResponses.responses_full_generator.<locals>.<listcomp>c                 S   rU  rx   )cached_input_tokensrW  rx   rx   ry   rY    rZ  )cached_tokensinput_tokens_per_turncached_tokens_per_turnc                 S   rU  rx   )output_tokensrW  rx   rx   ry   rY    rZ  c                 S   rU  rx   )tool_output_tokensrW  rx   rx   ry   rY    rZ  )reasoning_tokensr`  output_tokens_per_turntool_output_tokens_per_turn)rV  r_  total_tokensinput_tokens_detailsoutput_tokens_details)input_messagesoutput_messagesr   r   r   r   r   ).r   r  r   rO  r   CancelledErrorr   r  r   r   rM   (_make_response_output_items_with_harmonyenable_response_messagesr,  num_init_messagesnum_tool_output_tokensr  finish_reason_raise_if_errorr   rN   r   0make_response_output_items_from_parsable_contextrg  rh  rO   final_outputoutputs_make_response_output_itemsrE  num_prompt_tokensnum_output_tokensnum_cached_tokensnum_reasoning_tokensr[   rQ   all_turn_metricsrR   rZ   r  r   r   r   r   r  r   )rw   r   r   r5  r   r   r   r  r   rH  r   r.  r   rg  rh  r   rm  	final_resrq  rt  num_generated_tokensrv  rw  r   r6  stored_responserx   rx   ry   r%    s   (





	z/OpenAIServingResponses.responses_full_generator	recipientc                 C   s   |du rdS | d S )z
        Determine if a tool call is an MCP tool based on recipient prefix.

        - Tools starting with "functions." are function calls
        - Everything else is an MCP tool
        NF
functions.)
startswith)rw   r|  rx   rx   ry   _is_mcp_tool_by_namespace!  s   z0OpenAIServingResponses._is_mcp_tool_by_namespacecode_interpreterr   web_search_preview)r   r   r   _TOOL_NAME_TO_MCP_SERVER_LABELr   top_logprobsc           	      C   sn   g }t | D ],\}\}}||kr |S | j|||| jd}|t|t|jdt|j	dddd q|S )z8Returns the top-k logprobs from the logprobs dictionary.logprobtoken_idr   return_as_token_id    utf-8r
   errors)tokenr  bytes)
	enumerateitems_get_decoded_tokenr   r  r0   maxr  r   encode)	rw   r   r  r   outir  _logprobtextrx   rx   ry   _topk_logprobs5  s&   
z%OpenAIServingResponses._topk_logprobs	token_idsc                 C   s   |d usJ dt |t |ksJ dg }t|D ]6\}}|| }|| }	| j|	||| jd}
|t|
t|	jdt|
j	ddd|rK| j
|||dng d	 q|S )
Nzlogprobs must be providedz:token_ids and logprobs.token_ids must have the same lengthr  r  r  r
   r  )r  r   )r  r  r  r  )r  r  r  r   r  r/   r  r  r   r  r  )rw   r  r   r   r  r  r  r  r  token_logprobr  rx   rx   ry   _create_response_logprobsO  s6   
z0OpenAIServingResponses._create_response_logprobsc                 C   s    | j ||||d}dd |D S )Nr  r   r   r  c                 S   s,   g | ]}t j|j|jd d |jD dqS )c                 S   s   g | ]}t j|j|jd qS ))r  r  )r-   r0   r  r  )rK  tlrx   rx   ry   rY    s    zVOpenAIServingResponses._create_stream_response_logprobs.<locals>.<listcomp>.<listcomp>)r  r  r  )r-   r/   r  r  r  )rK  lgrx   rx   ry   rY    s    zKOpenAIServingResponses._create_stream_response_logprobs.<locals>.<listcomp>)r  )rw   r  r   r   r  lgsrx   rx   ry    _create_stream_response_logprobst  s   z7OpenAIServingResponses._create_stream_response_logprobsrq  c                 C   s   | j r| jr| jj|j|j|j|jddd d }| r+|jr+| j	|j|j||j
d}| jr@| |}|j|j|| j| j|dS tdt  |jrSt|jg d|dgng dd	d
dgS )NF)r   rr  output_token_idsrn  is_streamingdeltar  )model_outputr   r   r   r   msg_output_text)r  annotationsrJ  r   	assistantrP  r   )r  contentroler   rJ  )r   r   log_outputsr   r  r  rn  r   r   r  r  r   extract_response_outputsr   r   r!   rk   r"   )rw   r   rq  r   r   r   rx   rx   ry   rs    sV   



		z2OpenAIServingResponses._make_response_output_itemsc                 C   sH   g }|j }|j|d  D ]	}|t| qt|j}|r"|| |S r   )rl  r,  r   rH   rI   r   )rw   r   output_itemsrl  msg
last_itemsrx   rx   ry   rj    s   

z?OpenAIServingResponses._make_response_output_items_with_harmonyc                 C   sH   d }t |jts"|jD ]}t |tr!|ddkr!|d} |S q|S )Nr  systemr  )r   r   r   r   r   )rw   r   
system_msgresponse_msgrx   rx   ry   $_extract_system_message_from_request  s   

 z;OpenAIServingResponses._extract_system_message_from_requestwith_custom_tools
tool_typesc              	   C   s   |  |}|jr|jjnd }t|j}d|v r,| jd ur,| jdr,| jd|dnd }d|v rG| jd urG| jdrG| jd|dnd }d|v rb| jd urb| jdrb| jd|dnd }	t	|||||	|j
|d}
|
S )Nr  r   r  r   r   )model_identityreasoning_effortbrowser_descriptionpython_descriptioncontainer_descriptionrB  r  )r  	reasoningeffortr   r   r   r  get_tool_descriptionr   rE   rB  )rw   r   r  r  r  r  r   r  r  r  sys_msgrx   rx   ry   '_construct_harmony_system_input_message  sP   











	z>OpenAIServingResponses._construct_harmony_system_input_messagec                 C   s  g }|d u r1t |j}t|}| |||}|| |r*t|j|jd}|| |t|7 }nd| j|j	 }t
|dkr|d }	t|	tsHJ |	jdkrd}
tt
|d ddD ]}|| }t|tsfJ |jdkro|}
 nqY||
d d  }||
d d = |D ]}t|tsJ || q|| t|jtr|t|j |S |d urt|j}ng }|jD ]}t||}|jjdkr|| t|tr|| q|S )N)rB  r   r   rm   final   ru   r  )r_   r   rG   r  r  rC   rB  rB   r   r  r  r   OpenAIHarmonyMessagechannelranger   r   r   rF   r   r   rJ   authorr  r   )rw   r   r+  r,  r  r  r  dev_msg	prev_msgslast_msgprev_final_msg_idxr  
prev_msg_irecent_turn_msgsr  prev_outputsr  new_msgrx   rx   ry   rG    s^   










z=OpenAIServingResponses._construct_input_messages_with_harmonyc              
      sp  t  }t }||f| j|j< d }z\z!| j|g|R i |}|2 z3 d H W }|| |  q"6 W n4 tyK }	 z| 	|	}W Y d }	~	n"d }	~	w t
yh }	 ztd|j | |	}W Y d }	~	nd }	~	ww W |  n|  w |d urt|tr|j}
| j4 I d H   | j|
}|d usJ |jdvrd|_W d   I d H  d S 1 I d H sw   Y  d S d S d S Nz Background request failed for %s)rP  rT  failed)r   r   Eventr   r   r$  r  setr?   r&  r'  r   r	  r   r   r=   r   r   r   r   )rw   r   argskwargsevent_dequenew_event_signalr6  r4  eventr.  r   r{  rx   rx   ry   r   [  s<   


.z5OpenAIServingResponses._run_background_request_streamc              
      s
  z| j |g|R i |I d H }W n4 ty) } z| |}W Y d }~n"d }~w tyF } ztd|j | |}W Y d }~nd }~ww t|t	r|j}| j
4 I d H   | j|}|d uscJ |jdvrkd|_W d   I d H  d S 1 I d H s|w   Y  d S d S r  )r%  r?   r&  r'  r   r	  r   r   r   r=   r   r   r   r   )rw   r   r  r  r6  r.  r   r{  rx   rx   ry   r!  {  s(   "

.z.OpenAIServingResponses._run_background_requestr   starting_afterc                 C  s   || j vrtd| d|d| j | \}}|d u rdn|d }|}	 |  |t|k rI|| }|V  t|ddd	kr?d S |d7 }|t|k s.| I d H  q$)
NzUnknown response_id: r   )	parametervaluer   ru   TrJ  unknownresponse.completed)r   ra   clearr  r   wait)rw   r   r  r  r  start_indexcurrent_indexr  rx   rx   ry   r#    s*   
z<OpenAIServingResponses.responses_background_stream_generatorr  c              	      sn   | j 4 I d H  | j|}W d   I d H  n1 I d H sw   Y  |d u r-| |S |r5| ||S |S r   )r   r   r   r   r#  )rw   r   r  r  r6  rx   rx   ry   retrieve_responses  s   
(
z)OpenAIServingResponses.retrieve_responsesc              	      s   | j 4 I d H @ | j|}|d u r"| |W  d   I d H  S |j}|dvr;| jddddW  d   I d H  S d|_W d   I d H  n1 I d H sNw   Y  | j| }ry|  z|I d H  W |S  tj	yx   t
d| Y |S w |S )N)r   in_progressr   z%Cannot cancel a synchronous response.r   )r   r   r   rT  z$Background task for %s was cancelled)r   r   r   r   r   r   r   cancelr   ri  r   r	  )rw   r   r6  prev_statusr7  rx   rx   ry   cancel_responses  s4   (z'OpenAIServingResponses.cancel_responsesc                 C   s   | j dd| dtjddS )Nr   zResponse with id 'z' not found.r   r   )r   r   	NOT_FOUND)rw   r   rx   rx   ry   r     s   
z,OpenAIServingResponses._make_not_found_errorc                 C   s   | j ddtjddS )Nr   z`store=True` (default) is not supported. Please set `store=False` in Responses API or set `VLLM_ENABLE_RESPONSES_API_STORE=1` in the env var when starting the vLLM server.r   r   )r   r   r   rv   rx   rx   ry   _make_store_not_supported_error  s   z6OpenAIServingResponses._make_store_not_supported_error%_increment_sequence_number_and_returnc
                 C  s  d}
d}d}d }| j r| j jr| j |}d}g }d}g }|2 zS3 d H W }t|ts.J |jd u r4q |jjrt|jjd }| |j|j |r]|j	|||j
 |j
|||j |jd}nt|j
d}||j
7 }||j7 }|spq |stt }|jr|	tdd|td|g d	d
dV  n|	tdd|t|ddg d	ddV  |	tdd|||
tddg g ddV  |
d7 }
d}|r0|d jd ur0|jd ur0ddd |D }|	td|d||
|dV  d}
tdt|ddgd|g d}|	tdd||dV  |	tdd|t|ddg d	ddV  |d7 }tt }|	tdd|||
tddg g ddV  |
d7 }
g }|jd urE|	tdd|
|||jdV  n&|jd urk|	td d|
|||j| re| j|j|j||j d!ng d"V  |
d7 }
|!| q 6 |r|d jd urdd#d |D }|	td|d||
|dV  |
d7 }
tdt|ddgd|g d}|	tdd||dV  d S |d jd urdd$d |D }|	t"d%d||
|g |d&V  |
d7 }
t|dg d'}|	t#d(d|||
|d)V  |
d7 }
tdd|gd|g d*}|	tdd||dV  d S d S d S )+Nr   rp   F)previous_textcurrent_text
delta_textprevious_token_idscurrent_token_idsdelta_token_idsr  response.output_item.addedrm   r  r  rJ  r  summaryr   rJ  sequence_numberoutput_indexitemr   r  r  rJ  r  r  r   response.content_part.addedr  rJ  r  r  r   rJ  r  r  item_idcontent_indexpartru   Tc                 s        | ]}|j d ur|j V  qd S r   r  rK  pmrx   rx   ry   	<genexpr>d      
zJOpenAIServingResponses._process_simple_streaming_events.<locals>.<genexpr>response.reasoning_text.donerJ  r  r  r  r  r  reasoning_textr  rJ  rP  rJ  r  r   r  r  response.output_item.doneresponse.reasoning_text.delta)rJ  r  r  r  r  r  response.output_text.deltar  rJ  r  r  r  r  r  r   c                 s   r  r   r  r  rx   rx   ry   r     r  c                 s   r  r   r  r  rx   rx   ry   r     r  response.output_text.donerJ  r  r  r  r  r   r  )r  rJ  r  response.content_part.donerJ  r  r  r  r  r  )rJ  r  r  r   r  r  )$r   r   r   rO   last_outputrr  ro  rn  r   extract_reasoning_streamingr  r  r<   r   uuiduuid4r  r   r#   r!   r   r"   r  joinr%   ResponseReasoningTextContentr    r$   r'   r   r  r   r  r  r(   r   )rw   r   r   r5  r   r   r   r  r   r  rn   ro   rq   r   r  r  first_delta_sentprevious_delta_messagesctxr   delta_messagereason_contentreasoning_itemfinal_contentr  r  rx   rx   ry    _process_simple_streaming_events  s"  

	





  ?






&z7OpenAIServingResponses._process_simple_streaming_eventsr
  c              
   C   s   |j tdd }g }|td|jd j||j|jdd td|jd j||j|jddt	  d	d
}|t
dd|j|d |S )z+Emit events when a function call completes.r}  Nz%response.function_call_arguments.doner   rm   rJ  	argumentsr   r  r  r  function_callfc_rP  )rJ  r  r   r  r  r  call_idr   r  r  )r|  r  r  r   r  r  rq   ro   r   rk   r    )rw   previous_itemr
  function_nameeventsfunction_call_itemrx   rx   ry   _emit_function_call_done_events)  s>   




z6OpenAIServingResponses._emit_function_call_done_eventsc                 C   s   | j |j|j}g }|td|jd j|j|j|jdd |t	dd|j|jd |t
dd|jtd|jd j|j|j|d	d
d |S )z,Emit events when an MCP tool call completes. response.mcp_call_arguments.doner   rm   r  response.mcp_call.completedrJ  r  r  r  r  mcp_callrP  )rJ  r  r   r  r   r   r  )r  r   r|  r  r   r  r  rq   ro   r   r    r.   )rw   r"  r
  r   r$  rx   rx   ry   _emit_mcp_call_done_eventsO  sL   


z1OpenAIServingResponses._emit_mcp_call_done_eventsc              
   C   s   t |jd jdd}td|gd|jg d}g }|td|jd|j|j|jd jd	 |t	d
d|j|j|j|d |t
dd|j|d |S )z7Emit events when a reasoning (analysis) item completes.r   r  r  r  rP  r  r  rm   r  zresponse.reasoning_part.doner  r  r  )r  r  r  r#   rq   r  r%   ro   rn   rX   r    )rw   r"  r
  r  r  r$  rx   rx   ry   _emit_reasoning_done_events|  sR   



z2OpenAIServingResponses._emit_reasoning_done_eventsc                 C   s   t d|jd jg d}g }|tdd|j|j|jd jg |jd |tdd|j|j|j|d |t	d	d|jt
|jd
d|gddd |S )z4Emit events when a final text output item completes.r  r   )rJ  r  r  r  rm   r  r  r  r  r   r  rP  r  r  )r"   r  r  r  r(   ro   rn   rq   r   r    r!   )rw   r"  r
  text_contentr$  rx   rx   ry   _emit_text_output_done_events  sT   


z4OpenAIServingResponses._emit_text_output_done_eventsc                 C   s   |j dur*|j dr| ||S | |j r(|jdur(|jdr(| ||S g S |jdkr5| ||S |jdkr@| ||S g S )zBEmit done events for the previous item when expecting a new start.Nr}  mcp_analysisr  )	r|  r~  r&  r  rq   r+  r  r,  r.  )rw   r"  r
  rx   rx   ry   _emit_previous_item_done_events  s   





z6OpenAIServingResponses._emit_previous_item_done_eventsr  c                 C   s   g }|j s@d|_ dt  |_|tdd|jt|jddg ddd	 | jd
7  _|tdd|j|j|jt	ddg g dd |t
dd|j|j|j|jg d |S )z3Emit events for final channel text delta streaming.Tr  r  rm   r   r  r  r  r  ru   r  r  rp   r  r  r	  r
  )rr   rk   rq   r  r   ro   r!   rn   r   r"   r'   last_content_deltarw   r  r
  r$  rx   rx   ry    _emit_final_channel_delta_events  s\   z7OpenAIServingResponses._emit_final_channel_delta_eventsc                 C   s   g }|j s=d|_ dt  |_|tdd|jtd|jg ddd | jd	7  _|td
d|j|j|jt	dddd |t
d|j|j|j|jdd |S )z;Emit events for analysis channel reasoning delta streaming.Tr  r  rm   r  r  r  r  ru   zresponse.reasoning_part.addedrp   r  r  r  r  )rJ  r  r  r  r  r  )rr   rk   rq   r  r   ro   r#   rn   rW   r  r$   r2  r3  rx   rx   ry   #_emit_analysis_channel_delta_events)  sT   
z:OpenAIServingResponses._emit_analysis_channel_delta_eventsc                 C   s   | j ||}g }|js8d|_dt  |_|tdd|jtd|j|d|ddd	 |t	d
d|j|jd |t
dd|j|j|jd |S )z)Emit events for MCP tool delta streaming.Tr/  r  rm   r*  rp   r  rJ  r  r   r  r   r   r  response.mcp_call.in_progressr)  !response.mcp_call_arguments.deltarJ  r  r  r  r  )r  r   rr   rk   rq   r  r   ro   r.   r   r   r2  )rw   r  r
  r|  r   r$  rx   rx   ry   _emit_mcp_tool_delta_eventsZ  sL   	z2OpenAIServingResponses._emit_mcp_tool_delta_eventsc                 C   s   g }|j s1d|_ dt  |_|tdd|jtd|jddddd	d
 |tdd|j|jd |tdd|j|j|j	d |S )z1Emit events for code interpreter delta streaming.Ttool_r  rm   code_interpreter_callNrD  r  rJ  r  codecontainer_idrr  r   r  z*response.code_interpreter_call.in_progressr)  z)response.code_interpreter_call_code.deltar9  )
rr   rk   rq   r  r   ro   r   r   r   r2  r3  rx   rx   ry   #_emit_code_interpreter_delta_events  sJ   	z:OpenAIServingResponses._emit_code_interpreter_delta_eventsc                 C   s   g }|j s;d|_ dt  |_|jjtdd }|tdd|jt	d|j|d|d	d
d |t
dd|j|jd |tdd|j|j|jd |S )z3Emit events for MCP prefix (mcp.*) delta streaming.Tr/  mcp.Nr  rm   r*  rp   r  r6  r  r7  r)  r8  r9  )rr   rk   rq   r   current_recipientr  r  r   ro   r.   r   r   r2  )rw   r  r
  r$  mcp_namerx   rx   ry   _emit_mcp_prefix_delta_events  sL   		z4OpenAIServingResponses._emit_mcp_prefix_delta_eventsc                 C   s   |j sg S |jjdkr|jjdu r| ||S |jjdkr)|jjdu r)| ||S |jjdks5|jjdkr^|jjdur^|jj}|drJ| ||S | |}|rX| 	|||S | 
||S |jjdksj|jjdkr}|jjdur}|jjdr}| ||S g S )z>Emit events for content delta streaming based on channel type.r  Nr0  
commentaryr}  rA  )r2  r   current_channelrB  r4  r5  r~   _emit_function_call_delta_eventsr  r:  r@  rD  )rw   r  r
  r|  is_mcp_toolrx   rx   ry   _emit_content_delta_events  s2   

z1OpenAIServingResponses._emit_content_delta_eventsc                 C   sV  |j tdd }t|jd j}d}|dkr"tjd|d d}n0|dkr5tjdd	|	d
d d}n|dkrKtj
d|d d	|	d
d d}ntd| dt  |_g }|tdd|jtjd|j|ddd |tdd|j|jd |tdd|j|jd |tdd|j|jd |tdd|jtd|j|ddd |S )z0Emit events for browser tool calls (web search).browser.Nr   searchquery)rJ  rL  open	open_pagezcursor:cursorrp   )rJ  urlfindpattern)rJ  rR  rP  zUnknown function name: r;  r  rm   web_search_callr  )rJ  r  actionr   r  z$response.web_search_call.in_progressr)  z"response.web_search_call.searchingz"response.web_search_call.completedr  rP  )r|  r  jsonloadsr  r  r,   ActionSearchActionOpenPager   
ActionFindr  rk   rq   r  r   ro   r   r*   r+   r)   r    )rw   r"  r
  r#  parsed_argsrT  r$  rx   rx   ry   _emit_browser_tool_events  s   	z0OpenAIServingResponses._emit_browser_tool_eventsc                 C   s   |j }| j||}g }|tdd|j|j|jd j|d |t	dd|j|jd |t
dd|jtd|j||jd j|d	d
d |S )zDEmit events when an MCP tool completes during assistant action turn.r'  rm   r   rJ  r  r  r  r  r   r(  r)  r  r*  rP  r6  r  )r|  r  r   r  r   ro   rq   r  r  r   r    r.   )rw   r"  r
  r|  r   r$  rx   rx   ry    _emit_mcp_tool_completion_eventsi  sJ   


z7OpenAIServingResponses._emit_mcp_tool_completion_eventsc                 C   s   g }| tdd|j|j|jd jd | tdd|j|jd | tdd|j|jd | tdd|jt	d	|j|jd jd
g ddd |S )z,Emit events when code interpreter completes.z(response.code_interpreter_call_code.donerm   r   )rJ  r  r  r  r>  z+response.code_interpreter_call.interpretingr)  z(response.code_interpreter_call.completedr  r<  rD  rP  r=  r  )
r  r   ro   rq   r  r  r   r   r    r   )rw   r"  r
  r$  rx   rx   ry   (_emit_code_interpreter_completion_events  sT   
	
z?OpenAIServingResponses._emit_code_interpreter_completion_eventsc                 C   s   |j tdd }g }|tdd|j|j|jd j|d |tdd|j|jd |t	d	d|jt
d
|j||jd j|ddd |S )z6Emit events when an MCP prefix tool (mcp.*) completes.rA  Nr'  rm   r   r\  r(  r)  r  r*  rP  r6  r  )r|  r  r  r   ro   rq   r  r  r   r    r.   )rw   r"  r
  rC  r$  rx   rx   ry   "_emit_mcp_prefix_completion_events  sH   


z9OpenAIServingResponses._emit_mcp_prefix_completion_eventsc                 C   s   |  rt|jjdkrg S g }|jjd }| jdur5| jdr5|jdur5|jdr5|| 	|| | jdur{|jdur{|j
dur{|jr{|j}|drZ|| || |S | |od|j
d}|rr|| || |S || || |S )z!Emit events for tool action turn.r   rm   Nr   rJ  rA  r/  )is_assistant_action_turnr  r   r,  r   r  r|  r~  r   r[  rq   rr   r_  r  r]  r^  )rw   r  r
  r$  r"  r|  rH  rx   rx   ry   _emit_tool_action_events  sL   











z/OpenAIServingResponses._emit_tool_action_eventsc              	   C   s   |j jdkr|j jr|j jdsg S g }|jdu rId|_|j jtdd }dt  |_t|d|jdt  d	d
d}|	t
dd|j|d |	t|j|j|jddd |S )z?Emit events for developer function calls on commentary channel.rE  r}  FTNr   r  call_rp   r  )r   rJ  r  r!  r  r   r  rm   r  z&response.function_call_arguments.delta)r  r  r  r  rJ  )r   rF  rB  r~  rs   r  rk   rq   r   r  r   ro   r   r2  )rw   r  r
  r$  fc_nametool_call_itemrx   rx   ry   rG  %	  sJ   

		z7OpenAIServingResponses._emit_function_call_delta_eventsc
                 C  s   t  }
|2 zU3 d H W }t|tsJ | |j|j | r?t|jj	dkr;|jj	d }| 
||
D ]}|	|V  q3|
  | ||
D ]}|	|V  qE| ||
D ]}|	|V  qSq6 d S )Nr   rm   )rl   r   rP   ro  rn  r   is_expecting_startr  r   r,  r1  rz   rI  ra  )rw   r   r   r5  r   r   r   r  r   r  r
  r  r"  r  rx   rx   ry   !_process_harmony_streaming_eventsS	  s&   z8OpenAIServingResponses._process_harmony_streaming_eventsc	                   s  |pt t }d dtdtf fdd}	t 4 I d H }
| jr/| |||
I d H  | j}n| j}tj	||||g dd d
 }|	tdd	|d
V  |	tdd	|d
V  z||||||||||		2 z	3 d H W }|V  qa6 W n+ ty } z| |}|	tt|V  W Y d }~W d   I d H  d S d }~ww dd }| j||| |||||dI d H }|	tdd	|d
V  W d   I d H  d S 1 I d H sw   Y  d S )Nr   r  rt   c                    s   t | dr | _ d7  | S )Nr  ru   )r   r  )r  r  rx   ry   r  	  s   
z`OpenAIServingResponses.responses_stream_generator.<locals>._increment_sequence_number_and_returnr  r   zresponse.createdrm   )rJ  r  r6  zresponse.in_progressc                   S  s   d S r   rx   rx   rx   rx   ry   empty_async_generator	  s   zPOpenAIServingResponses.responses_stream_generator.<locals>.empty_async_generator)r   r  )r   r  r\   r   r   rO  rf  r  rZ   r  
model_dumprT   rU   r?   /_convert_generation_error_to_streaming_responser5   validate_jsonr%  rS   )rw   r   r   r5  r   r   r   r  r   r  rH  	processerinitial_response
event_datar.  
error_jsonrh  final_responserx   rg  ry   r$  z	  s   
	

/
.z1OpenAIServingResponses.responses_stream_generatorr   )Xr{   r|   r}   r7   rA   r:   r   r9   r   r;   r   rg   r=   r   rY   r   r   r   r\   rZ   r8  r  r  rL   r   rO  rh   r   rj   r>   r   r%  r  r  r   r   r   SampleLogprobr   r0   r  r   rd   r/   r  r-   r  re   r   rs  rM   rj  r  r  r  r  rG  r   r!  r#  r  r  r   r  r   r  rl   r&  r+  r,  r.  r1  rP   r4  r5  r:  r@  rD  rI  r[  r]  r^  r_  ra  rG  rf  r$  __classcell__rx   rx   r   ry   r      s  
 		
g

$

  


	

 	


*

:

3
E
 






	


  3
&
-
0
1

6
1
.
,
/
/
W
,
1
+
4
.
	


0
	

r   )r   rU  r  r  collectionsr   collections.abcr   r   r   r   
contextlibr   r   dataclassesr	   r
   httpr   typingr   r  fastapir   openai.types.responsesr   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r    r!   r"   r#   r$   r%   r&   r'   r(   r)   r*   r+   r,   r-   +openai.types.responses.response_output_itemr.   +openai.types.responses.response_output_textr/   r0   .openai.types.responses.response_reasoning_itemr1   r  openai.types.responses.toolr2   r3   openai_harmonyr4   r  pydanticr5   vllmr6   vllm.engine.protocolr7   vllm.entrypoints.chat_utilsr8   r9   vllm.entrypoints.loggerr:    vllm.entrypoints.mcp.tool_serverr;   'vllm.entrypoints.openai.engine.protocolr<   r=   r>   &vllm.entrypoints.openai.engine.servingr?   r@   &vllm.entrypoints.openai.models.servingrA   ,vllm.entrypoints.openai.parser.harmony_utilsrB   rC   rD   rE   rF   rG   rH   rI   rJ   rK   )vllm.entrypoints.openai.responses.contextrL   rM   rN   rO   rP   *vllm.entrypoints.openai.responses.protocolrQ   rR   rS   rT   rU   rV   rW   rX   rY   rZ   r[   r\   'vllm.entrypoints.openai.responses.utilsr]   r^   r_   vllm.entrypoints.utilsr`   vllm.exceptionsra   vllm.inputs.datarb   vllm.loggerrc   vllm.logprobsrq  rd   vllm.outputsre   vllm.parserrf   vllm.renderers.inputsrg   vllm.sampling_paramsrh   ri   vllm.tokenizersrj   
vllm.utilsrk   r{   r   rl   r   r   r   r   r   rx   rx   rx   ry   <module>   sj   "08
'