o
    پi                     @  s  d Z ddlmZ ddlZddlZddlZddlZddlZddlm	Z	 ddl
mZ ddlmZmZmZmZmZmZ ddlZddlm  mZ ddlZddlmZ ddlmZ dd	lmZmZmZ dd
l m!Z! ddl"m#Z$ ddl%m&Z' ddl(m)Z)m*Z*m+Z+m,Z, ddl-m.Z.m/Z/m0Z0m1Z1m2Z2m3Z3m4Z4m5Z5 ddl6m7Z7m8Z8m9Z9m:Z:m;Z;m<Z<m=Z= ddl>m?Z? ddl@mAZAmBZB ddlCmDZD ddlEmFZF ddlGmHZH erddlImJZJ ddlKmLZL eMeNZOG dd de?ZPdS )"Handler for /v1/responses requests    )annotationsN)AsyncExitStack)
HTTPStatus)TYPE_CHECKINGAnyAsyncGeneratorAsyncIteratorOptionalUnion)RequestORJSONResponse)ResponseOutputMessageResponseOutputTextResponseReasoningItem)ResponseFunctionToolCall)Content)Message)ConversationContextHarmonyContextSimpleContextStreamingHarmonyContext)get_developer_message%get_stop_tokens_for_assistant_actionsget_system_messageget_user_messageparse_output_messageparse_remaining_stateparse_response_inputrender_for_completion)ChatCompletionMessageParamChatCompletionRequestPromptTokenUsageInfoRequestResponseMetadataResponsesRequestResponsesResponse	UsageInfo)OpenAIServingChat)MCPToolServer
ToolServer)GenerateReqInput)ReasoningParser)random_uuid)TemplateManager)TokenizerManagerc                      s  e Zd ZdZdddddc fddZ			dddeddZ		dfdgdd Zdhd!d"Z	didjd(d)Zdkd.d/Z	dld0d1Z
	didmd=d>Zdnd@dAZdodCdDZ	didpdFdGZdqdIdJZ	didrdKdLZdsdNdOZdsdPdQZdtdRdSZdtdTdUZ	didudYdZZ		dvdwdadbZ  ZS )xOpenAIServingResponsesr   FN)enable_prompt_tokens_detailsenable_force_include_usagetool_servertokenizer_managerr/   template_managerr.   r1   boolr2   r3   Optional[ToolServer]returnNonec                  s   t  || | jjj| _|| _|| _i | _|r|dnd| _	|r'|dnd| _
|| _| jjjjdk| _| jrLd| jvrCg | jd< | jd t  i | _t | _i | _i | _d S )NbrowserFpythongpt_ossstop_token_ids)super__init__r4   server_argsreasoning_parserr1   r2   default_sampling_paramshas_toolsupports_browsingsupports_code_interpreterr3   model_config	hf_config
model_typeuse_harmonyextendr   response_storeasyncioLockresponse_store_lock	msg_storebackground_tasks)selfr4   r5   r1   r2   r3   	__class__ c/home/ubuntu/.local/lib/python3.10/site-packages/sglang/srt/entrypoints/openai/serving_responses.pyr?   I   s.   	




zOpenAIServingResponses.__init__invalid_request_error  messagestrerr_typestatus_codeintparamOptional[str]r   c                 C  s   ||||d}t d|i|dS )NrX   typer]   codeerror)contentr[   r   )rQ   rX   rZ   r[   r]   nested_errorrT   rT   rU   create_error_response   s   z,OpenAIServingResponses.create_error_responseBadRequestErrorc                 C  s   t d||d |diS )Nrb   r_   )jsondumps)rQ   rX   rZ   r[   rT   rT   rU   create_streaming_error_response   s   z6OpenAIServingResponses.create_streaming_error_responsec                 C  s   dS Nresp_rT   )rQ   rT   rT   rU   _request_id_prefix   s   z)OpenAIServingResponses._request_id_prefixrequestr%   raw_requestOptional[Request]CUnion[AsyncGenerator[str, None], ResponsesResponse, ORJSONResponse]c                   s8  j s	dS |j}|d urG|ds|S j4 I d H  j|}W d   I d H  n1 I d H s8w   Y  |d u rF|S nd }z#|j	}j j
}jr^||\}}}	n|||I d H \}}}	W n' ttttjfy }
 ztd |
 d|
j W  Y d }
~
S d }
~
ww t|jd}|r||j_jd urtjtr|js|jr|jrt dd |jD rÈdS g }g }jrڈj!r|"d	 j#r|"d
 t$ 4 I d H  zjd ur fdd|D }i }|D ]}|| I d H ||< qnt%|dksJ i }t&|	D ]v\}}t'|dr#t%|}nt|t(r.t%|}nd}t'j j)dr=j j)j*nd}t+|| d}|,|j-}jra|jr[t.||}n	t/||}nt0 }t1|||j|j2||jd}j3|j|| |||||j4d}|"| qW n$ ty }
 zt5|
W  Y d }
~
W  d   I d H  S d }
~
ww t%|dksJ |\}|j6r|j7|j< |jr2t8t99 }t:j;||||g dd dj4 I d H  jj<< W d   I d H  n1 I d H sw   Y  t=j>?||||||||dj< d}|j@j<< |Afdd W  d   I d H  S |jrKB|||||||W  d   I d H  S zC|||||||I d H }|W W  d   I d H  S  tDy }
 zt5|
W  Y d }
~
W  d   I d H  S d }
~
ww 1 I d H sw   Y  dS )NzModel not loadedrk   z$Error in preprocessing prompt inputs )
request_idc                 s  s    | ]}|j d v V  qdS ))web_search_previewcode_interpreterNr`   .0toolrT   rT   rU   	<genexpr>   s
    
z:OpenAIServingResponses.create_responses.<locals>.<genexpr>zFMCP tool server is not supported in background mode and streaming moder:   r;   c                   s    i | ]}|  j|qS rT   )enter_async_contextr3   get_tool_session)rw   	tool_name)
exit_stackrQ   rT   rU   
<dictcomp>   s    
z;OpenAIServingResponses.create_responses.<locals>.<dictcomp>r   __len__context_len   i   )	input_idssampling_paramsstreamrid	extra_key
background)rn   priority   queued
model_namecreated_timeoutputstatususagecreate_)namec                   s   j  jd S N)rP   popid)_)responserQ   rT   rU   <lambda>S  s    z9OpenAIServingResponses.create_responses.<locals>.<lambda>zUnknown error)Er4   re   previous_response_id
startswith_make_invalid_id_errorrN   rK   get_make_not_found_errormodel	tokenizerrI   _make_request_with_harmony_make_request
ValueError	TypeErrorRuntimeErrorjinja2TemplateErrorlogger	exception	__cause__r$   rr   staterequest_metadatar3   
isinstancer)   r   r   toolsanyrD   appendrE   r   len	enumeratehasattrlistrF   r   maxto_sampling_paramsrB   r   r   r   r+   _compute_extra_key_generate_with_builtin_toolsr   rY   storerO   r\   timer&   from_requestr   rL   create_task_run_background_requestrP   add_done_callbackresponses_stream_generatorresponses_full_generator	Exception)rQ   rm   rn   prev_response_idprev_responser   r   messagesrequest_promptsengine_promptser   
generators	tool_listtool_session_ctxstool_sessionsr|   iengine_promptprompt_lengthr   default_max_tokensr   contextadapted_request	generatorresult_generatorr   taskresultrT   )r}   r   rQ   rU   create_responses   st  


(



"

	



		2A	*

oy      
	z'OpenAIServingResponses.create_responsesr   Optional[ResponsesResponse]r   r   c              
     s   |  ||}z)t|j||jd}| jjj}| ||}|r'|jg}|jg}	n|j	g}|j	g}	W nB t
yr }
 z6td|
  d}|D ]}|dd}|dd}|| d| d7 }qD||}|g}|g}	W Y d }
~
nd }
~
ww |||	fS )	N)r   r   r   z(Chat processing failed, using fallback:  roleuserrc   z: 
)_construct_input_messagesr"   r   r   r4   rF   is_multimodal_process_messagesprompt
prompt_idsr   r   warningr   encode)rQ   rm   r   r   r   chat_requestr   processed_messagesr   r   r   prompt_textmsgr   rc   r   rT   rT   rU   r   r  s:   



z$OpenAIServingResponses._make_requestc                 C  s8   |j dkr	td| ||}t|}|}||g|gfS )Nautoz4Only 'auto' tool_choice is supported in response API)tool_choiceNotImplementedError&_construct_input_messages_with_harmonyr    )rQ   rm   r   r   prompt_token_idsr   rT   rT   rU   r     s   
z1OpenAIServingResponses._make_request_with_harmonyr   r   AsyncIterator[Any]r   r   r   r   r$   r   Optional[int](Union[ResponsesResponse, ORJSONResponse]c	              
     sP  |d u rt t }z|2 z3 d H W }	q6 W n' tjy&   | d Y S  ty> }
 z| t|
W  Y d }
~
S d }
~
ww | jr[t|t	sIJ | 
|}|j}|j}|j}|j}nlt|tsbJ |j}|d uskJ | ||d |}t|dr|jdd}|jdd}|jdd}n8t|drt|d	r|jrt|jnd}|jr|jd jrt|jd jnd}t|d
d}d}nd}d}d}d}t|||| |d}| jr|rt|d|_||_tj |||||d|d}|j!r&| j"4 I d H $ | j#|j$}|d u s	|j%dkr|| j#|j$< W d   I d H  |S 1 I d H s!w   Y  |S )NzClient disconnectedtext	meta_infoprompt_tokensr   completion_tokenscached_tokensr   outputsnum_cached_tokens)r   r   total_tokensreasoning_tokens)r   	completedr   	cancelled)&r\   r   rL   CancelledErrorre   r   rY   rI   r   r   (_make_response_output_items_with_harmonynum_prompt_tokensnum_output_tokensr   num_reasoning_tokensr   last_output_make_response_output_itemsr   r   r   r   r   r   	token_idsgetattrr'   r1   r#   prompt_tokens_detailsfinal_usage_infor&   r   r   rN   rK   r   r   )rQ   rm   r   r   r   r   r   r   r   r   r   r   r   num_generated_tokensr   r   	final_resr   r   stored_responserT   rT   rU   r     s   





z/OpenAIServingResponses.responses_full_generatorfinal_outputc                 C  s   | j rt| j d|d}||\}}nd }|}g }|r2tdt  dg td|dgd d}|| |rOt|g dd d	}	td
t  |	gdddd}
||
 |S )NF)rH   stream_reasoningrm   rs_	reasoningreasoning_text)r`   r   r   r`   summaryrc   r   output_text)r   r   r`   logprobsmsg_	assistantr   rX   )r   rc   r   r   r`   )	rA   r,   parse_non_streamr   r-   ResponseReasoningTextContentr   r   r   )rQ   rm   r  r   rA   reasoning_contentrc   output_itemsreasoning_itemr  rX   rT   rT   rU   r     sL   



z2OpenAIServingResponses._make_response_output_itemsr   c                 C  sH   g }|j }|j|d  D ]	}|t| qt|j}|r"|| |S r   )num_init_messagesr   rJ   r   r   parser)rQ   r   r  r  r   
last_itemsrT   rT   rU   r   >  s   

z?OpenAIServingResponses._make_response_output_items_with_harmony list[ChatCompletionMessageParam]c                 C  s   g }|j r|d|j d |d ur8| j|j }|| |jD ]}t|tr(q |jD ]}|d|j d q+q t|j	t
rI|d|j	d |S ||j	 |S )Nsystem)r   rc   r   )instructionsr   rO   r   rJ   r   r   r   rc   inputrY   )rQ   rm   r   r   prev_msgoutput_itemrc   rT   rT   rU   r   L  s2   




z0OpenAIServingResponses._construct_input_messageslist['OpenAIMessage']c                 C  s  g }|d u rX|j r|j jnd }dd |jD }d|v o| jd u}d|v o(| jd u}t|| jr6|r6| jdnd | jrB|rB| jdnd d}|| t|j|j}	||	 nc| j	|j
 }
t|
dkrt|
d	 d
r|
d	 jdkrd	}tt|
d d	d	D ]}t|
| d
r|
| jdkr|} nq~|
|d d  }|
|d d = |D ]}t|d
r|jdkr|
| q||
 t|jtr|t|j |S |d urt|j}ng }|jD ]}|t|| t|tr|| q|S )Nc                 S  s   g | ]}|j qS rT   ru   rv   rT   rT   rU   
<listcomp>~  s    zQOpenAIServingResponses._construct_input_messages_with_harmony.<locals>.<listcomp>rs   rt   r:   r;   )reasoning_effortbrowser_descriptionpython_descriptionr   channelfinal   r   analysis)r
  effortr   r3   r   get_tool_descriptionr   r   r  rO   r   r   r   r&  rangerJ   r   r  rY   r   copyr   r   r   )rQ   rm   r   r   r"  
tool_typesenable_browserenable_code_interpretersys_msgdev_msg	prev_msgsprev_final_msg_idxr   recent_turn_msgsr   prev_outputsresponse_msgrT   rT   rU   r   u  st   







z=OpenAIServingResponses._construct_input_messages_with_harmonyc	              
     sJ  zE| j 4 I d H  | j|j}|d usJ d|_W d   I d H  n1 I d H s*w   Y  | j||||||||g|	R i |
I d H }W n  tyf } ztd|j | 	t
|}W Y d }~nd }~ww t|tr|j}| j 4 I d H   | j|}|d usJ |jdvrd|_W d   I d H  d S 1 I d H sw   Y  d S d S )Nin_progressz Background request failed for %s)r   r   failed)rN   rK   r   rr   r   r   r   r   r   re   rY   r   r   )rQ   rm   r   r   r   r   r   r   r   argskwargsr  r   r   response_idrT   rT   rU   r     sF   (	


.z.OpenAIServingResponses._run_background_requestr<  c              	     sr   | ds| |S | j4 I d H  | j|}W d   I d H  n1 I d H s)w   Y  |d u r7| |S |S rj   )r   r   rN   rK   r   r   )rQ   r<  r   rT   rT   rU   retrieve_responses  s   

(
z)OpenAIServingResponses.retrieve_responsesc              	     s  | ds| |S | j4 I d H ? | j|}|d u r,| |W  d   I d H  S |j}|dvrD| jdddW  d   I d H  S d|_W d   I d H  n1 I d H sWw   Y  | jj	|d | j
| }r|  z|I d H  W |S  tjy   td| Y |S w |S )	Nrk   )r   r8  rV   z%Cannot cancel a synchronous response.)rZ   rX   r   )r   z$Background task for %s was cancelled)r   r   rN   rK   r   r   r   re   r4   abort_requestrP   cancelrL   r   r   r   )rQ   r<  r   prev_statusr   rT   rT   rU   cancel_responses  s8   

(z'OpenAIServingResponses.cancel_responsesc                 C  s   | j d| ddddS )NzInvalid 'response_id': 'z*'. Expected an ID that begins with 'resp'.rV   r<  )rX   rZ   r]   )re   rQ   r<  rT   rT   rU   r     s
   
z-OpenAIServingResponses._make_invalid_id_errorc                 C  s   | j d| ddtjddS )NzResponse with id 'z' not found.rV   r<  )rX   rZ   r[   r]   )re   r   	NOT_FOUNDrB  rT   rT   rU   r   &  s   
z,OpenAIServingResponses._make_not_found_error&AsyncIterator[StreamingHarmonyContext]r   AsyncGenerator[str, None]c	                  s:  |pt t }d  fdd}	d}
d}dt  }d}tj||||g dd d }|	tjdd	|d
V  |	tjdd	|d
V  |2 z3 d H W }t	|dsPqC|
 r|d7 }d}t|jjdkr|jjd	 }|jd urnn|jdkrtdt  dg t|jd jddgdd}|	tjd|d	||
|jd jdV  |	tjdd	||dV  nG|jdkrtjd|jd jg d}|	tjdd	||
|jd jg |dV  |	tjdd	|||
|dV  |	tjdd	|tj|d d!|gdd"dV  |jjr|jjdkrB|jjd u rB|s0d#}|	tjd$d	|tj|d d!g dd"dV  |	tjd%d	|||
tjdd&g d d'd(V  |	tjd)d	|
|||jjg d*V  nK|jjdkr|jjd u r|s}d#}|	tjd$d	|tjd|g dd+dV  |	tjd%d	|||
tjdd&g d d'd(V  |	tj d,|||
|jjd	d-V  |! rt|jjdkr|jjd	 }| j"rU|jd urU|j#d.rU|jtd.d  }d }t$%|jd j}|d/krtj&j'd/|d0 d1}n4|d2krtj&j(d3d4|)d5d& d6}n|d7krtj&j*d7|d8 d4|)d5d& d9}nt+d:| |	tjd$d	|tj&j,d;||dd<dV  |	tj-d=d	||d>V  |	tj.d?d	||d>V  |	tj/d@d	||d>V  |	tjdd	|tj,d;||dd<dV  | j0r|jd ur|j#dAr|	tjd$d	|tj1dB|d&dCg ddDdV  |	tj2dEd	||d>V  |	tj3dFd	|||jd jdGV  |	tj4dHd	||d>V  |	tj5dId	||d>V  |	tjdd	|tj1dB||jd jdCg ddDdV  qC6 dJdK }| j6||| |||||dLI d H }| }|)dMr|dM }|)dNddO|)dOdi|)dPddQ|)dQdi|)dRddS|dM< |	tj7dTd	|d
V  d S )UNr   c                   s>   t | dr | _ d7  t| dd}d| d| jd d dS )	Nsequence_numberr   r`   unknownzevent: z
data: )indentz

)r   rF  r  model_dump_json)event
event_typerF  rT   rU   _send_event@  s   

zFOpenAIServingResponses.responses_stream_generator.<locals>._send_eventitem_Fr8  r   zresponse.createdr%  )r`   rF  r   zresponse.in_progressis_expecting_startr   r)  r	  r
  r  )r   r`   r   r  zresponse.reasoning_text.done)r`   item_idrF  output_indexcontent_indexr   zresponse.output_item.done)r`   rF  rQ  itemr'  r  )r`   r   r   zresponse.output_text.done)r`   rF  rQ  rR  r   r  rP  zresponse.content_part.done)r`   rF  rP  rQ  rR  partrX   r  )r   r`   r   rc   r   Tzresponse.output_item.addedzresponse.content_part.addedr   )r`   r   r   r  )r`   rF  rQ  rP  rR  rT  zresponse.output_text.delta)r`   rF  rR  rQ  rP  deltar  )r`   r   r  r   zresponse.reasoning_text.delta)r`   rP  rQ  rR  rU  rF  zbrowser.searchquery)r`   rW  open	open_pagezcursor:cursor)r`   urlfindpattern)r`   r]  r[  zUnknown function name: web_search_call)r`   r   actionr   z$response.web_search_call.in_progress)r`   rF  rQ  rP  z"response.web_search_call.searchingz"response.web_search_call.completedr;   code_interpreter_callr   )r`   r   ra   container_idr   r   z*response.code_interpreter_call.in_progressz(response.code_interpreter_call_code.done)r`   rF  rQ  rP  ra   z+response.code_interpreter_call.interpretingz(response.code_interpreter_call.completedc                   S s   d S r   rT   rT   rT   rT   rU   empty_async_generator  s   zPOpenAIServingResponses.responses_stream_generator.<locals>.empty_async_generator)r   r   r   r   r   r   r   )input_tokensinput_tokens_detailsoutput_tokensoutput_tokens_detailsr   zresponse.completed)8r\   r   r-   r&   r   
model_dumpopenai_responses_typesResponseCreatedEventResponseInProgressEventr   rO  r   r  r   	recipientr&  r   r  rc   r   ResponseReasoningTextDoneEventResponseOutputItemDoneEventr   ResponseTextDoneEventResponseContentPartDoneEventr   last_content_deltacurrent_channelcurrent_recipientResponseOutputItemAddedEventResponseContentPartAddedEventResponseTextDeltaEventResponseReasoningTextDeltaEventis_assistant_action_turnrD   r   orjsonloadsresponse_function_web_searchActionSearchActionOpenPager   
ActionFindr   ResponseFunctionWebSearch$ResponseWebSearchCallInProgressEvent#ResponseWebSearchCallSearchingEvent#ResponseWebSearchCallCompletedEventrE   $ResponseCodeInterpreterToolCallParam*ResponseCodeInterpreterCallInProgressEvent(ResponseCodeInterpreterCallCodeDoneEvent,ResponseCodeInterpreterCallInterpretingEvent)ResponseCodeInterpreterCallCompletedEventr   ResponseCompletedEvent)rQ   rm   r   r   r   r   r   r   r   rM  current_content_indexcurrent_output_indexcurrent_item_idsent_output_item_addedinitial_responsectxprevious_itemr  text_contentfunction_namer_  parsed_argsrb  final_responseresponse_dict
usage_inforT   rL  rU   r   .  s  	

















	
	
    Y




z1OpenAIServingResponses.responses_stream_generatorrr   request_promptr   r+   r   AsyncGenerator[Any, None]c                 K s   |pd}		 | j ||}
|
2 z3 dH W }|| |V  q6 | s%dS | I dH }|| | }t|||j||j|j	|j
|j|j|j|jd}t|dsVt|tryt| j jdd}|t| d }t|trst|d|d< nt|d|_|	d }q)	z<Generate with builtin tool support for harmony-based models.r   TN)r   r   r   r   r   return_logproblogprob_start_lentop_logprobs_numreturn_text_in_logprobsreturn_hidden_statesr   max_new_tokensr   r   r   )r4   generate_requestappend_outputneed_builtin_tool_call	call_toolr    r+   r   r   r  r  r  r  r  r   r   r   dictr  rF   r   r   r  )rQ   rr   r  r   r   r   rn   r   r;  orig_priorityr   restool_outputr   r   remaining_tokensrT   rT   rU   r     sP   



z3OpenAIServingResponses._generate_with_builtin_tools)r4   r/   r5   r.   r1   r6   r2   r6   r3   r7   r8   r9   )rV   rW   N)
rX   rY   rZ   rY   r[   r\   r]   r^   r8   r   )rf   rW   )rX   rY   rZ   rY   r[   r\   r8   rY   )r8   rY   r   )rm   r%   rn   ro   r8   rp   )rm   r%   r   r   r   r   )rm   r%   r   r   )rm   r%   r   r   r   r   r   r   r   rY   r   r   r   r$   r   r   r8   r   )rm   r%   r  r   r   r   )r   r   )rm   r%   r   r   r8   r  )rm   r%   r   r   r8   r   )rm   r%   r   r   r   r   r   r   r   rY   r   r   r   r$   r   r   )r<  rY   r8   r   )r<  rY   )rm   r%   r   r   r   rD  r   r   r   rY   r   r   r   r$   r   r   r8   rE  )NN)rr   rY   r  r   r   r+   r   r   r   r   rn   ro   r   r   r8   r  )__name__
__module____qualname____doc__r?   re   ri   rl   r   r   r   r   r   r   r   r   r   r=  rA  r   r   r   r   __classcell__rT   rT   rR   rU   r0   F   sP    ;
 
O
,
`
2
)T
-

!

   Er0   )Qr  
__future__r   rL   r-  rg   loggingr   
contextlibr   httpr   typingr   r   r   r	   r
   r   r   openai.types.responsestypes	responsesrh  rx  fastapir   fastapi.responsesr   r   r   r   2openai.types.responses.response_function_tool_callr   .openai.types.responses.response_reasoning_itemr   r  openai_harmonyr   OpenAIMessagesglang.srt.entrypoints.contextr   r   r   r   $sglang.srt.entrypoints.harmony_utilsr   r   r   r   r   r   r   r    &sglang.srt.entrypoints.openai.protocolr!   r"   r#   r$   r%   r&   r'   *sglang.srt.entrypoints.openai.serving_chatr(   )sglang.srt.entrypoints.openai.tool_serverr)   r*   sglang.srt.managers.io_structr+   "sglang.srt.parser.reasoning_parserr,   sglang.srt.utilsr-   $sglang.srt.managers.template_managerr.   %sglang.srt.managers.tokenizer_managerr/   	getLoggerr  r   r0   rT   rT   rT   rU   <module>   s@    ($
	
