o
    پi                     @  s  d dl mZ d dlZd dlZd dlZd dlZd dlZd dlmZm	Z	m
Z
mZmZmZmZ d dlZd dlZd dlmZ d dlmZmZ d dlmZmZ d dlmZ d dlmZmZmZmZm Z m!Z!m"Z"m#Z#m$Z$m%Z%m&Z&m'Z'm(Z(m)Z)m*Z*m+Z+m,Z,m-Z- d d	l.m/Z/ d d
l0m1Z1 d dl2m3Z3m4Z4m5Z5m6Z6 d dl7m8Z8 d dl9m:Z: d dl;m<Z< d dl=m>Z> d dl?m@Z@ d dlAmBZB d dlCmDZD d dlEmFZF erd dlGmHZH d dlImJZJ eKeLZMdddZNG dd de/ZOdS )    )annotationsN)TYPE_CHECKINGAnyAsyncGeneratorDictListOptionalUnion)Request)ORJSONResponseStreamingResponse)Draft202012ValidatorSchemaError)encode_messages)ChatCompletionRequestChatCompletionResponseChatCompletionResponseChoice"ChatCompletionResponseStreamChoiceChatCompletionStreamResponseChatCompletionTokenLogprobChatMessageChoiceLogprobsDeltaMessageErrorResponseFunctionResponseLogProbsMessageProcessingResultSglExtToolCallToolCallProcessingResult
ToolChoice
TopLogprob)OpenAIServingBase)UsageProcessor)&process_cached_tokens_details_from_retprocess_hidden_states_from_retprocess_routed_experts_from_retto_openai_style_logprobs)ToolCallItem)FunctionCallParser)JsonArrayParser)get_json_schema_constraint)GenerateReqInput)generate_chat_conv)#process_content_for_template_format)ReasoningParser)TemplateManager)TokenizerManagerrequestr   c                 C  s   g }g }| j pg D ]Z}t|dd }t|tsq	|D ]I}t|dd dkr?t|dd }|r1t|dd nd }|d ur>|t| qt|dd dkrbt|dd }|rUt|dd nd }|d urb|t| qq	|rjt|nd }	|rrt|nd }
|	|
fS )Ncontenttype	image_urlmax_dynamic_patch	video_url)messagesgetattr
isinstancelistappendintmin)r2   img_valsvid_valsmsgr3   partiumdpvuimg_max_dynamic_patchvid_max_dynamic_patch rH   ^/home/ubuntu/.local/lib/python3.10/site-packages/sglang/srt/entrypoints/openai/serving_chat.py_extract_max_dynamic_patch=   s.   
rJ   c                      s  e Zd ZdZdZdr fddZdsddZdtddZduddZdvddZ	dwddZ
	dxdyd#d$Zdzd'd(Zd{d+d,Zdzd-d.Zd|d2d3Zd}d5d6Zd~d8d9Zdd>d?Z	dddDdEZddIdJZddNdOZ		PdddWdXZdd[d\ZddbdcZddddeZddfdgZddldmZddpdqZ  ZS )OpenAIServingChatz)Handler for /v1/chat/completions requestsFtokenizer_managerr1   template_managerr0   c                   s   t  | || _| jjj| _| jjj| _| jj | _	| j	r.t
js.td| j	  dt
_t| jjdoDt| jjjdoD| jjjjdk| _|  | _d S )NzAUsing default chat sampling params from model generation config: T	hf_config
model_typegpt_oss)super__init__rM   rL   server_argstool_call_parserreasoning_parsermodel_configget_default_sampling_paramsdefault_sampling_paramsrK   _default_sampling_params_loggedloggerinfohasattrrN   rO   
is_gpt_oss_use_dpsk_v32_encodinguse_dpsk_v32_encoding)selfrL   rM   	__class__rH   rI   rR   \   s(   

zOpenAIServingChat.__init__r8   List[Dict[str, Any]]r2   r   return*tuple[List[Dict[str, Any]], Optional[str]]c                 C  sf   d}|r/|d  ddkr/|d  d}t|tr/|jr(|}|dd }||fS d|d|d< ||fS )a  
        Handle continue_final_message feature: separate final assistant message.

        If continue_final_message is enabled and the last message is from assistant,
        extract its content and remove it from the message list.
        If continue_final_message is False and the last message is from assistant,
        convert it to a user message to ensure the last message is always from user.

        Only processes text-based content (strings), ignoring multimodal content (lists).

        Args:
            messages: List of message dictionaries
            request: ChatCompletionRequest with continue_final_message flag

        Returns:
            Tuple of (processed_messages, assistant_prefix)
            - processed_messages: Messages with last assistant message handled appropriately
            - assistant_prefix: Content of the last assistant message (string only), or None
        Nrole	assistantr3   userrg   r3   )getr:   strcontinue_final_message)r`   r8   r2   assistant_prefixlast_contentrH   rH   rI   _handle_last_assistant_message|   s   
z0OpenAIServingChat._handle_last_assistant_message
prompt_ids	List[int]rn   rl   c                 C  s8   | j j|}|r|d | j jjkr|dd }|| S )a
  
        Append assistant prefix to prompt_ids.

        Args:
            prompt_ids: Current prompt token IDs
            assistant_prefix: Assistant message content to append

        Returns:
            Updated prompt_ids with assistant prefix appended
        r      N)rL   	tokenizerencodebos_token_id)r`   rq   rn   encodedrH   rH   rI   &_append_assistant_prefix_to_prompt_ids   s   z8OpenAIServingChat._append_assistant_prefix_to_prompt_idsboolc                 C  sD   | j jd uo| j jjd u}| j jjj}|rd|d v nd}| o!|S )N
DeepseekV3r   F)rL   rt   chat_templaterV   rN   architectures)r`   has_chat_templater|   is_dpsk_v32rH   rH   rI   r^      s   
z(OpenAIServingChat._use_dpsk_v32_encodingc                 C  s   dS )Nz	chatcmpl-rH   )r`   rH   rH   rI   _request_id_prefix   s   z$OpenAIServingChat._request_id_prefixOptional[str]c           	        sh  |j sdS t|jtr|j dkr|jsdS |jdur@t|jts@|js'dS |jjj t fdd|jD }|s@d  d	S t	|jpEg D ]2\}}|jj
du rRqGz	t|jj
 W qG tyy } zd
| dt| W  Y d}~  S d}~ww |jp|j}| jjj}|r|r||kr| jjjsd| d| dS |jr|jjdkrt|jjdd}|du rdS dS )z!Validate that the input is valid.zMessages cannot be empty.requiredz8Tools cannot be empty if tool choice is set to required.Nz?Tools cannot be empty if tool choice is set to a specific tool.c                 3  s    | ]	}|j j kV  qd S N)functionname.0tool	tool_namerH   rI   	<genexpr>       z6OpenAIServingChat._validate_request.<locals>.<genexpr>zTool 'z' not found in tools list.zTool z+ function has invalid 'parameters' schema: z$max_completion_tokens is too large: z.This model supports at most z completion tokens.json_schemaschema_z<schema_ is required for json_schema response format request.)r8   r:   tool_choicerl   lowertoolsr   r   any	enumerate
parametersr   check_schemar   max_completion_tokens
max_tokensrL   rS   context_lengthallow_auto_truncateresponse_formatr4   r9   r   )	r`   r2   tool_existsir   emax_output_tokensserver_context_lengthschemarH   r   rI   _validate_request   sT   

$
z#OpenAIServingChat._validate_requestNraw_requestr
   .tuple[GenerateReqInput, ChatCompletionRequest]c              	   C  s^  |j r
|j dd nd }|d ur||_	 | jjj}| ||}|j|j| j	|j
d}|r2d|ji}nt|jtr>d|ji}nd|ji}| |}| |j|j}	t|\}
}td#i |i d|jd|jd|jd|d	|jd
dd|jpdd|jddd|jd|	d|jd|jd|jd|jd|j d|j!d|j"d| #|d| $|d|j%d| &|d|d|j'd |
d!|d"t(|d"d }||fS d|jddd|jd|	d|jd|jd|jd|jd|j d|j!d|j"d| #|d| $|d|j%d| &|d|d|j'd |
d!|d"t(|d"d }||fS )$Nreasoning_effort)stopmodel_generation_configtool_call_constrainttext	input_ids
image_data
video_data
audio_datasampling_paramsreturn_logproblogprob_start_lenrf   top_logprobs_numr   streamreturn_text_in_logprobsT
modalities	lora_pathbootstrap_hostbootstrap_portbootstrap_roomdata_parallel_rankreturn_hidden_statesreturn_routed_expertsrid	extra_keyrequire_reasoningpriorityrouting_keycustom_labelscustom_logit_processorimage_max_dynamic_patchvideo_max_dynamic_patchr6   rH   ))chat_template_kwargspopr   rL   rV   is_multimodal_process_messagesto_sampling_paramsr   rX   r   promptr:   rq   rl   extract_custom_labels_resolve_lora_pathmodelr   rJ   r,   r   r   r   logprobstop_logprobsr   r   r   r   r   r   r   r   r   _compute_extra_key_get_reasoning_from_requestr   extract_routing_keyr   r9   )r`   r2   r   r   r   processed_messagesr   prompt_kwargsr   r   rF   rG   adapted_requestrH   rH   rI   _convert_to_internal_request   s   



	



	



z.OpenAIServingChat._convert_to_internal_requestr   r   c                   s   | j rd _d}d} jrT jdkrTd _t jts& fdd jD }ndd  jD }| jr>t j| j}| j} jdksIt jt	rTt
 j j}d|f}| jjdu rb|  ||}n|  |}||_|S )	z-Process chat messages and apply chat templateFNnonec                   s&   g | ]}|j j jj jkr| qS rH   )r   r   r   
model_dumpr   itemr2   rH   rI   
<listcomp>K  s
    z7OpenAIServingChat._process_messages.<locals>.<listcomp>c                 S     g | ]}|  qS rH   r   r   rH   rH   rI   r   Q      r   r   )r]   skip_special_tokensr   r   r:   rl   rT   r)   get_structure_constraintr    r+   rM   chat_template_name_apply_jinja_template_apply_conversation_templater   )r`   r2   r   r   r   parserr   resultrH   r   rI   r   <  s8   
z#OpenAIServingChat._process_messagesr   Optional[List[Dict]]c                 C  s(  d}g }g }g }g }g }	g }
| j j}| jr|jpi drdnd}|j}dd |D }|D ]}|ddu r:d|d< t|||||	|
| jd}|| q-| ||\}}|d	 d
 dkre|	d	ddd |j
rtdd |j
D |d	 d< t||d}| jj|}|r| ||}n|jD ]P}|jdu rd|_| }t|||||	|
}|d
 dkrd|v rt|d tr|d D ]}d|d v rt|d d trt|d d |d d< q|| q| ||\}}z| jjj|fdd||jd|jr|jni ddi}W nS tyV } zF|rdd |D nd}z| jjj|fdd||jd|jr-|jni ddi}W n tjyK } ztt||d}~ww W Y d}~nd}~ww |r`| ||}|rj| jj|}|j}|rr|nd}|	ry|	nd}	|r|nd}|
r|
ng }
t |||||	|
|dS )zApply Jinja chat template thinkingchatc                 S  r   rH   r   )r   rA   rH   rH   rI   r     r   z;OpenAIServingChat._apply_jinja_template.<locals>.<listcomp>r3   N)r_   r   rg   systemrj   c                 S  r   rH   r   r   rH   rH   rI   r     r   r   )thinking_moderh   
tool_calls	argumentsr   T)tokenizeadd_generation_promptr   r   return_dictFc                 S  s    g | ]}d |v r|d  n|qS )r   rH   )r   trH   rH   rI   r     s     r   rq   r   r   r   r   r   )!rM   jinja_template_content_formatr_   r   rk   r8   r.   updaterp   insertr   r   rL   rt   ru   rx   r3   r   r:   r;   rl   orjsonloadsr<   apply_chat_templater   	Exceptionjinja2TemplateError
ValueErrordecoder   r   )r`   r2   r   r   r   rq   openai_compatible_messagesr   r   r   r   template_content_formatr   r8   rA   processed_msgrn   
real_inputmessagemsg_dictr   r   template_errorr   rH   rH   rI   r   i  s  	




z'OpenAIServingChat._apply_jinja_templatec              	   C  s  d}g }t || jj}|jr|jr|jd jdkr|jr*|jd d du r*|j  | }t|j	t
rI|j	D ]}||rG|dt|  }q7nt|j	tr_||j	r_|dt|j	  }|jrr||jrr|dt|j  }t|ddr||jr|dt|j  }n| }| |r| jdvr|d7 }|jr|jnd}|jr|jnd}|jr|jnd}	|jr|jng }
t|js|j	pg ng }|jrt|jtr||j n||j |s| jj|}t|||||	|
|d	S )
zApply conversation templater   rf   rh   rs   Nsep2)qwen3zqwen3-thinkingglm4z<think>r   ) r-   rM   r   rm   r8   rg   r   
get_promptr:   stop_strr;   endswithlenrl   sepr9   r  r   rU   r   r   r   r   copy
ignore_eosr   r<   extendrL   rt   ru   r   )r`   r2   r   r   rq   conv
stop_tokenr   r   r   r   r   rH   rH   rI   r     sh   



 z.OpenAIServingChat._apply_conversation_templater   r,   r   c                   s"   t | |||d| j|dS )z(Handle streaming chat completion requestztext/event-stream)
media_type
background)r   _generate_chat_streamrL   create_abort_task)r`   r   r2   r   rH   rH   rI   _handle_streaming_requestN  s   
z+OpenAIServingChat._handle_streaming_requestAsyncGenerator[str, None]c           +   
   C s  i }i }i }i }i }i }	i }
i }i }i }i }i }z| j ||2 z3 dH W }|dd}|d d ||< |d d ||< |d dd||< |d dd||< |d d	d||< |d d
 }d}|jr||d}t|d d }||k s~|du r| ||}|||< |r|d nd}|r||
|< ||drd||< tddd}t||ddd}t|d d t	t

 |g|jd}d|  dV  ||d}|d t|d }|| ||< | jr3|jr3| |||||\}}|r3t|t|ddd}t|d d t	t

 |g|jd}|jr*|jjr*tj||d||dd|_d|  dV  |jdkrw|jrw| jrw| ||||||	2 z3 dH W }|rW|V  qK6 |durv||v rv|| }| ||||}|rv|V  q"|rt|t|ddd|d}t|d d t	t

 |g|jd}|jr|jjrtj||d||dd|_d|  dV  q"6 |
 D ]E\}}|d }|} |	|dr|dkrd} t|d d t	t

 t|t | d |v r|d  ndd!g|jdd"}!d|!  dV  q|jrJ|rJ| D ]8\}}"|"rHt|"d#kr#|"d$ ng }#t|d d t	t

 t|t|#d%ddg|jd}$d|$  dV  q|jr|rtd&d' | D d}%|%durt|d d t	t

 g |jt|%d(d)}&d|&  dV  |jr|jj rtj!||||j"| j j#j$d*}'t|d d t	t

 g |j|'d"}(d|(  dV  W n! t%y }) z| &t'|)}*d|* dV  W Y d})~)nd})~)ww d+V  dS ),z+Generate streaming chat completion responseNindexr   	meta_infoprompt_tokenscompletion_tokenscached_tokenshidden_statesrouted_expertsfinish_reasonoutput_token_logprobsr4   TFrh   r   rj   )r  deltar  r   idr!  createdchoicesr   data: 

r   )reasoning_contentr  r   r  r  r  r   r3   )r  r   r  matched_stopr   r   r   matched)r  r   r  r+  )r!  r#  r$  r   usagers   rf   )r  c                 s  s    | ]	}|d ur|V  qd S r   rH   )r   vrH   rH   rI   r   =  r   z:OpenAIServingChat._generate_chat_stream.<locals>.<genexpr>)r  )r!  r#  r$  r   sglext	n_choicesenable_cache_reportzdata: [DONE]

)(rL   generate_requestrk   r   r
  _process_streaming_logprobsr   r   r   r=   timer   model_dump_jsonrU   separate_reasoning_process_reasoning_streamstream_optionscontinuous_usage_statsr#   calculate_token_usager-  r   r   rT   _process_tool_call_stream_check_for_unstreamed_tool_argsitemsr   r   nextvaluesr   include_usagecalculate_streaming_usagenrS   r2  r   create_streaming_error_responserl   )+r`   r   r2   r   parser_dictreasoning_parser_dict	is_firstsstream_buffersn_prev_tokenshas_tool_callsfinish_reasonsr  r  r  r  r  r3   r  r  choice_logprobsn_prev_tokentotal_output_logprobsfinish_reason_typer   choice_datachunkstream_bufferreasoning_textr   remaining_chunkidxfinish_reason_datafinal_finish_reasonfinish_reason_chunkchoice_hidden_stateslast_token_hidden_stateshidden_states_chunkfirst_routed_expertsrouted_experts_chunkr-  usage_chunkr   errorrH   rH   rI   r  [  s  





	



	

  



	





z'OpenAIServingChat._generate_chat_stream<Union[ChatCompletionResponse, ErrorResponse, ORJSONResponse]c              
     s|   z| j || I dH }W n ty( } z| t|W  Y d}~S d}~ww t|ts1|g}| ||t	t

 }|S )z,Handle non-streaming chat completion requestN)rL   r3  	__anext__r   create_error_responserl   r:   r;   _build_chat_responser=   r5  )r`   r   r2   r   retr   responserH   rH   rI   _handle_non_streaming_requesta  s&   

z/OpenAIServingChat._handle_non_streaming_requestrd  r#  r=   -Union[ChatCompletionResponse, ORJSONResponse]c                 C  s  g }|d }t ||}t||}d}|s|rt||d}t|D ]\}	}
d}|jr.| |
}t|
|}|
d d }|
d }d}| j}|r|jr| j	j
pO| |}zt|d||d}||\}}W n$ ty } ztd	|  | jd
dddW  Y d}~  S d}~ww d}|jdkr|jr| jr| |}| ||j||j|\}}}t|	td|r|nd||r|ndd||r|d nd|rd|v r|d nd|d}|| q tj||j| jjjd}t |d d d ||j!||d|d d d i|dS )z6Build chat completion response from generation resultsr   N)r  cached_tokens_detailsr  r  r   F)rO   stream_reasoningforce_reasoningr2   zReasoning parsing error: z!Failed to parse reasoning contentInternalServerErrori  )err_typestatus_coder   rh   )rg   r3   r   r'  r4   r,  )r  r  r   r  r+  r  r0  r!  weight_version)r!  r#  r   r$  r-  metadatar/  )"r&   r$   r   r   r   _process_response_logprobsr%   rU   r7  rM   rj  r   r/   parse_non_streamr   rZ   r_  rb  r   r   rT   _get_history_tool_calls_cnt_process_tool_callsr   r   r<   r#   calculate_response_usagerC  rL   rS   r2  r   r   )r`   r2   rd  r#  r$  	first_retr  rh  response_sglextrU  ret_itemrL  r  r  r   rS  rU   is_force_reasoningr   r   r   history_tool_calls_cntrP  r-  rH   rH   rI   rc  z  s   



	




z&OpenAIServingChat._build_chat_responser   r   use_token_index List[ChatCompletionTokenLogprob]c              	   C  s   g }t t|j|jD ]@\}\}}t|d}g }|jr@|r!|nd}	|j|	  D ]\}
}t|
d}|t	|
||d q*|t
||||d q|S )zCommon helper to process logprobs tokens for both streaming and non-streaming

        Args:
            logprobs: LogProbs data from model
            use_token_index: True for non-streaming (use token_idx), False for streaming (use index 0)
        zutf-8r   )tokenbyteslogprob)r|  r}  r~  r   )r   ziptokenstoken_logprobsr;   ru   r   r>  r<   r!   r   )r`   r   rz  r  	token_idxr|  r~  token_bytesr   top_logprobs_idx	top_tokentop_logprobtop_token_bytesrH   rH   rI   _process_logprobs_tokens  s<   	
	z*OpenAIServingChat._process_logprobs_tokensrw  Dict[str, Any]r   c                 C  s8   t |d d |d ddd}| j|dd}t|dS )	z+Process logprobs for non-streaming responser  r  output_top_logprobsNr  r  Trz  r*  r'   rk   r  r   )r`   rw  r   r  rH   rH   rI   rp    s   

z,OpenAIServingChat._process_response_logprobs	call_itemr(   ry  c                 C  s`   | j dkrdt jdd  }|S d|j d||j  }td| j  d| d	|  |S )
z6Process for generating a new and unique `tool_call_id`kimi_k2call_N   z
functions.:zProcess tool call idx, parser: z, tool_call_id: z, history_cnt: )rT   uuiduuid4hexr   
tool_indexrZ   debug)r`   r  ry  tool_call_idrH   rH   rI   _process_tool_call_id  s   
z'OpenAIServingChat._process_tool_call_idr   r   	List[Any]r  r    Optional[Union[str, ToolChoice]]r   c                 C  s  |dkst |tr|jdkr|d dkrd|d< d|d< zDt|}g }t|D ]1\}}	t||	d tj|	d	 d
dd}
| 	|
|}|
t||t|	d tj|	d	 d
ddd q(t|d|W S  tjy } ztd|  td||W  Y d}~S d}~ww t|| j}||r|d dkrd|d< d|d< z/||\}}g }|D ]}
| 	|
|}|
t|t|
ddt|
j|
jdd qt|||W S  ty } ztd|  td||W  Y d}~S d}~ww td||S )z"Process tool calls in the responser   r   r4   r   r   Nr,  r   r   Fensure_ascii)r  r   r   r   r   r!  r  r   r   zTool call parsing error: r  )r:   r    r4   r   r   r   r(   jsondumpsr  r<   r   r   r   JSONDecodeErrorrZ   r_  r)   rT   has_tool_callrq  r9   r   r   r   )r`   r   r   r  r   ry  tool_call_datar   r   r   	call_infotool_idr   r   call_info_listrH   rH   rI   rs  0  s   



	z%OpenAIServingChat._process_tool_callsr3   rM  c                 C  sH   t |d d |d |d dg |d d}| j|dd}t|dS )	z'Process logprobs for streaming responser  r  Nr  r  Fr  r*  r  )r`   r3   rM  r   r  rH   rH   rI   r4    s   
	
z-OpenAIServingChat._process_streaming_logprobsr  r   rF  Dict[int, ReasoningParser]tuple[Optional[str], str]c                 C  sB   ||vr| j jp| |}t| j|j||||< || }||S )z/Process reasoning content in streaming response)rM   rj  r   r/   rU   ri  parse_stream_chunk)r`   r  r   rF  r3   r2   rx  rU   rH   rH   rI   r8    s   	
z+OpenAIServingChat._process_reasoning_streamc                 C  sP   t |dg }d}|D ]}|jdkr%t |dd}||dur"tt|nd7 }q
|S )a  Counts the number of tool calls in the request's message history.

        NOTE: This method is only useful for models that include self-increasing
        history tool call idx in tool calls id, such as kimi-k2

        Args:
            request: The chat completion request object.

        Returns:
            The total number of tool calls in the history, or 0 if not applicable.
        r8   r   rh   r   N)r9   rg   r
  r;   )r`   r2   r8   rU  rA   r   rH   rH   rI   rr    s   
z-OpenAIServingChat._get_history_tool_calls_cntc                 C  sv   | j sdS | j dv r|jduo|jddu S | j dv r(|j p'|jdduS | j dv r9|j p8|jdduS dS )	z)Judge whether the request needs reasoningF)zdeepseek-v3Nr   T)r  )r  glm45nano_v3interns1enable_thinking)rU   r   rk   )r`   r2   rH   rH   rI   r     s   



z-OpenAIServingChat._get_reasoning_from_requestrE  Dict[int, FunctionCallParser]rJ  Dict[int, bool]c                 C s  ||vr |j dkst|j trt ||< n
t|j| jd||< || }t|tr8|||j}|j|j	}	}
n|
|\}	}
|	rt|t|	ddd}t|d d tt |g|jd}|jr||jjr||d d	d
}|d dd
}tj||d|_d|  dV  | |}|
D ]i}d||< |jr| ||}|j}nd}d}t||jt||jdd}t|t|gddd}t|d d tt |g|jd}|jr|jjr|d d	d
}|d dd
}tj||d|_d|  dV  qdS )z(Process tool calls in streaming responser   )r   rT   r*  Nr(  r  r!  r"  r  r   r  r)  r%  r&  Tr  r  r   )r   r:   r    r*   r)   r   rT   parse_streaming_incrementnormal_textcallsr  r   r   r   r=   r5  r   r9  r:  rk   r#   r;  r-  r6  rr  r   r  r   r  r   r   )r`   r  r   rE  r3   r2   rJ  r   r   r  r  rP  rQ  r  r  ry  r  r  function_name	tool_callrH   rH   rI   r<    s   





	


z+OpenAIServingChat._process_tool_call_streamr   *Union[FunctionCallParser, JsonArrayParser]c                 C  s  t |dr|jn|}t |dr|jsdS t |dr|jsdS t|jd }|dk s0|t|jkr2dS |j| di }tj|dd	}|j| }	|	|v rR||	d
dnd
}
|
rt	d|t
d|
dd}t|t|gddd}t|d d tt |g|jd}d|  dS dS )z
        Check for any remaining tool call arguments that need to be streamed
        when generation finishes. This ensures tool calls are properly completed
        even if the model generates the final arguments in the last chunk.
        detectorprev_tool_call_arrNstreamed_args_for_toolrs   r   r   Fr  r   r  r  r  r(  r  r!  r"  r%  r&  )r\   r  r  r  r
  rk   r  r  replacer   r   r   r   r   r=   r5  r   r6  )r`   r   r3   r2   r  r  r  expected_argsexpected_callactual_callremaining_callr  rP  rQ  rH   rH   rI   r=  <  sV   
	


z1OpenAIServingChat._check_for_unstreamed_tool_args)rL   r1   rM   r0   )r8   rc   r2   r   rd   re   )rq   rr   rn   rl   rd   rr   )rd   ry   )rd   rl   )r2   r   rd   r   r   )r2   r   r   r
   rd   r   )r2   r   r   ry   rd   r   )r2   r   r   r   r   ry   rd   r   )r   r,   r2   r   r   r
   rd   r   )r   r,   r2   r   r   r
   rd   r  )r   r,   r2   r   r   r
   rd   r`  )r2   r   rd  rc   r#  r=   rd   rg  )F)r   r   rz  ry   rd   r{  )rw  r  rd   r   )r  r(   ry  r=   rd   rl   )Nr   )r   rl   r   r  r  r  r   r  ry  r=   rd   r   )r3   r  rM  r=   rd   r   )r  r=   r   rl   rF  r  r3   r  r2   r   rd   r  )r2   r   rd   r=   )r2   r   rd   ry   )r  r=   r   rl   rE  r  r3   r  r2   r   rJ  r  )
r   r  r3   r  r2   r   r  r=   rd   r   )__name__
__module____qualname____doc__rY   rR   rp   rx   r^   r   r   r   r   r   r   r  r  rf  rc  r  rp  r  rs  r4  r8  rr  r   r<  r=  __classcell__rH   rH   ra   rI   rK   W   sD    
 
&

	
3
L
- 
%
A
  

o
*


O



jrK   )r2   r   )P
__future__r   r  r  loggingr5  r  typingr   r   r   r   r   r   r	   r   r   fastapir
   fastapi.responsesr   r   
jsonschemar   r   ,sglang.srt.entrypoints.openai.encoding_dsv32r   &sglang.srt.entrypoints.openai.protocolr   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r    r!   *sglang.srt.entrypoints.openai.serving_baser"   -sglang.srt.entrypoints.openai.usage_processorr#   #sglang.srt.entrypoints.openai.utilsr$   r%   r&   r'   #sglang.srt.function_call.core_typesr(   -sglang.srt.function_call.function_call_parserr)   *sglang.srt.function_call.json_array_parserr*   sglang.srt.function_call.utilsr+   sglang.srt.managers.io_structr,   sglang.srt.parser.conversationr-   &sglang.srt.parser.jinja_template_utilsr.   "sglang.srt.parser.reasoning_parserr/   $sglang.srt.managers.template_managerr0   %sglang.srt.managers.tokenizer_managerr1   	getLoggerr  rZ   rJ   rK   rH   rH   rH   rI   <module>   s>    $P

