o
    iZ                    @   s`  d dl Z d dlZd dlZd dlmZmZ d dlmZ d dlm	Z	m
Z
 d dlZd dlZd dlZd dlmZ d dlmZ d dlmZ d dlmZ d d	lmZmZmZmZ d d
lmZ d dlm Z m!Z!m"Z"m#Z#m$Z$m%Z%m&Z&m'Z'm(Z(m)Z) d dl*m+Z+m,Z, d dl-m.Z.m/Z/m0Z0m1Z1m2Z2m3Z3m4Z4m5Z5m6Z6 d dl7m8Z8m9Z9m:Z: d dl;m<Z< d dl=m>Z>m?Z?m@Z@mAZAmBZBmCZCmDZD d dlEmFZF d dlGmHZHmIZI d dlJmKZK d dlLmMZM d dlNmOZO d dlPmQZQmRZR d dlSmTZT d dlUmVZV d dlWmXZX d dlYmZZZm[Z[ d dl\m]Z] d dl^m_Z_m`Z`maZambZb d dlcmdZd d dlemfZf d dlgmhZh d d limjZj d d!lkmlZl eMemZnG d"d# d#e9ZodS )$    N)AsyncGeneratorAsyncIterator)Sequence)AnyFinal)Request)Message)Allow)EngineClient)ChatTemplateContentFormatOptionConversationMessageget_history_tool_calls_cntmake_tool_call_id)RequestLogger)
ChatCompletionLogProbChatCompletionLogProbsChatCompletionLogProbsContent"ChatCompletionNamedToolChoiceParamChatCompletionRequestChatCompletionResponseChatCompletionResponseChoice"ChatCompletionResponseStreamChoiceChatCompletionStreamResponseChatMessage)
TokenStateextract_harmony_streaming_delta)	DeltaFunctionCallDeltaMessageDeltaToolCallErrorResponseFunctionCallPromptTokenUsageInfoRequestResponseMetadataToolCall	UsageInfo)GenerationErrorOpenAIServingclamp_prompt_logprobs)OpenAIServingModels)get_developer_message%get_stop_tokens_for_assistant_actions#get_streamable_parser_for_assistantget_system_message%parse_chat_inputs_to_harmony_messagesparse_chat_outputrender_for_completion) maybe_filter_parallel_tool_calls)get_max_tokensshould_include_usage)TokensPrompt)init_logger)Logprob)CompletionOutputRequestOutput)ParserManager)ReasoningParser)	TokPrompt)BeamSearchParamsSamplingParams)TokenizerLike)MistralTokenizermaybe_serialize_tool_callstruncate_tool_call_idsvalidate_request_params)
ToolParser)MistralToolCall)partial_json_loads)as_list)%validate_logits_processors_parametersc                '       s  e Zd Zddddddddddddddededed	edB d
edB dedededededededB dededededede	ee
f dB ddf& fddZdTddZdedeee ee f eB fddZ	dUded edB deedf eB eB fd!d"Zdedefd#d$ZedVd'edefd(d)Zed*ed+edeeef fd,d-Z	dUd+ed.edB d*ed/ed0edB deedB ef fd1d2Z	dUded3ee  d4ed5ed6ee d7e!d8e"de#dB deedf fd9d:Z$	dUded3ee  d4ed5ed6ee d7e!d8e"de#dB deeB fd;d<Z%d=e	ee&f d>edB d7e!dB d?edee' f
d@dAZ(		dWdBe)e d>e)e	ee&f dB  d7e!dB dCedB dDedB de*fdEdFZ+defdGdHZ,dIedB dJe-defdKdLZ.edIedMedNedefdOdPZ/	dXdedQefdRdSZ0  Z1S )YOpenAIServingChatF NT)trust_request_chat_templatereturn_tokens_as_token_idsreasoning_parserenable_auto_tools#exclude_tools_when_tool_choice_nonetool_parserenable_prompt_tokens_detailsenable_force_include_usageenable_log_outputsenable_log_deltaslog_error_stackdefault_chat_template_kwargsengine_clientmodelsresponse_rolerequest_loggerchat_templatechat_template_content_formatrI   rJ   rK   rL   rM   rN   rO   rP   rQ   rR   rS   rT   returnc                   s0  t  j|||||d || _|| _|| _|| _|pi | _|| _|| _| j	j
| _
tj|	d| _|
| _tj||
| j	jd| _|| _|| _|| _| j	 | _| j	jjdk| _| jrid| jvr`g | jd< | jd t  t| j	dd }| j	jjdkst|tr| ddkrd| _!nd	| _!d
| _"d | _#d
| _$d | _%d S )N)rU   rV   rX   rJ   rS   )reasoning_parser_name)tool_parser_namerL   
model_namegpt_ossstop_token_idshf_overrideskimi_k2
model_typerandomF)&super__init__rW   rY   rZ   rI   rT   rQ   rR   model_configlogits_processorsr8   get_reasoning_parserreasoning_parser_clsrL   get_tool_parsermodelrN   rM   rO   rP   get_diff_sampling_paramdefault_sampling_params	hf_configrc   use_harmonyextendr*   getattrhf_text_config
isinstancedictgettool_call_id_typesupports_browsingbrowser_toolsupports_code_interpreterpython_tool)selfrU   rV   rW   rX   rY   rZ   rI   rJ   rK   rL   rM   rN   rO   rP   rQ   rR   rS   rT   ra   	__class__ e/home/ubuntu/vllm_env/lib/python3.10/site-packages/vllm/entrypoints/openai/chat_completion/serving.pyrf   _   s\   





zOpenAIServingChat.__init__c                    s   t d t }z-tdddgddd}| j||j| j| j| j	dI dH  t | d	 }t d
| W dS  t
yF   t d Y dS w )a$  
        Warm up the chat template processing to avoid first-request latency.

        This method triggers Jinja2 template compilation and content format
        detection that would otherwise happen on the first real request,
        causing increased latency on the first request.
        z&Warming up chat template processing...userwarmuprolecontentN   )messagesrl   max_completion_tokens)default_templatedefault_template_content_formatdefault_template_kwargsi  z(Chat template warmup completed in %.1fmszChat template warmup failed)loggerinfotimeperf_counterr   _preprocess_chatr   rY   rZ   rT   	Exception	exception)r|   
start_timedummy_requestelapsedr   r   r   r      s*   

zOpenAIServingChat.warmuprequestc                    s  |  |I dH }|durtd| |S | jjr| jjz| jj}|j}| j}t	|t
r9t| t| t| |du oFt	|t
 oF| j }|rl|jdvrl|jdkr\| js\| dW S |jdkrl| d|j dW S |jdu sy|jdkr|| jr|d}nd	d
 |jD }| js| j|j|j| jd}|dur|W S | j||j| j| j| j||dI dH \}}	n|du}
| ||
\}}	W ||	fS W ||	fS  tttt j!fy } zt"d | |W  Y d}~S d}~ww )z
        render chat request by validating and preprocessing inputs.

        Returns:
            A tuple of (conversation, engine_prompts) on success,
            or an ErrorResponse on failure.
        NzError with model %s)NnoneautozV"auto" tool choice requires --enable-auto-tool-choice and --tool-call-parser to be setztool_choice="z'" requires --tool-call-parser to be setr   c                 S   s   g | ]}|  qS r   )
model_dump).0toolr   r   r   
<listcomp>  s    z9OpenAIServingChat.render_chat_request.<locals>.<listcomp>)request_chat_templatechat_template_kwargsrI   )r   r   r   
tool_dictsrN   z$Error in preprocessing prompt inputs)#_check_modelr   errorrU   errored
dead_errorrenderer	tokenizerrN   rt   r>   r?   r@   rA   rp   tool_choicerL   create_error_responsetoolsrM   _validate_chat_templaterY   r   rI   r   r   rZ   rT   _make_request_with_harmony
ValueError	TypeErrorRuntimeErrorjinja2TemplateErrorr   )r|   r   error_check_retr   r   rN   tool_parsing_unavailabler   conversationengine_promptsshould_include_toolser   r   r   render_chat_request   s   






z%OpenAIServingChat.render_chat_requestraw_requestc                    s  | j j}|dusJ d}z| jr | |j| j}| j||d}W n ty? } ztd | 	t
|W  Y d}~S d}~ww | |I dH }t|trO|S |\}}	d| ||j }
t|
d}|rh||j_z| j|dd}| j|}W n tttfy } ztd | 	|W  Y d}~S d}~ww | |}g }zt|	D ]\}}| |}t|	d	kr|
n|
 d
| }t| j|jdur|jn|j|  || j!}|j"r|#|| j!}n|$|| j%j&| j!}t'| j(| | j)||||d |du rdn| *|j+I dH }t|t,r| j-|||||d}n:|.| j%}|/ }| j0j1|||||||j2|d}d}|r>|3|j4p9g }||_5| j6j7||||||j2|||d	}|8| qW n tym } z| 	|W  Y d}~S d}~ww t|d	kswJ |\}|j9r| :|||
|||||S z| ;|||
|||||I dH W S  t<y } z| =|W  Y d}~S d}~w ty } z| 	|W  Y d}~S d}~ww )z
        Chat Completion API similar to OpenAI's API.

        See https://platform.openai.com/docs/api-reference/chat/create
        for the API specification. This API mimics the OpenAI
        Chat Completion API.
        N)r   z#Error in reasoning parser creation.z	chatcmpl-)
request_idT)supports_default_mm_lorasz"Error preparing request componentsr   _)paramslora_request)promptr   r   r   trace_headers)r   tokenization_kwargsr   prioritydata_parallel_rank)r   r   r   prompt_textr   r   )>r   r   rj   #_prepare_extra_chat_template_kwargsr   rT   r   r   r   r   strr   rt   r   _base_request_idr   r"   staterequest_metadata_maybe_get_adaptersrV   r^   r   r   _get_data_parallel_rank	enumerate_extract_prompt_textlenr1   max_model_lenr   
max_tokens_extract_prompt_lenrn   use_beam_searchto_beam_search_paramsto_sampling_paramsrg   logits_processor_patternrF   rh   _log_inputs_get_trace_headersheadersr;   beam_searchbuild_tok_paramsget_encode_kwargsinput_processorprocess_inputsr   is_reasoning_endprompt_token_idsreasoning_endedrU   generateappendstream chat_completion_stream_generatorchat_completion_full_generatorr%   %_convert_generation_error_to_response)r|   r   r   r   rK   r   r   resultr   r   r   r   r   r^   r   
generatorsiengine_promptr   sub_request_idr   sampling_paramsr   	generator
tok_paramsr   engine_requestr   result_generatorr   r   r   create_chat_completion@  s.  







	

X
z(OpenAIServingChat.create_chat_completionc                 C   s   |j r| jS |jd d S )Nr   )add_generation_promptrW   r   r|   r   r   r   r   get_chat_request_role  s   z'OpenAIServingChat.get_chat_request_role{}sc                 C   s4   d}| D ]}||kr|d7 }q||kr|d8 }q|S )zS
        Calculate the current level of nested brackets in a given string.
        r   r   r   )r   openingclosinglevelcharr   r   r   _bracket_level  s   
z OpenAIServingChat._bracket_level
delta_textprevious_textc                 C   sz   t |}d\}}| D ]-}|dkr|d7 }|dk}n|dkr&|d8 }|dk}|dkr/||7 }q|dkr8 ||fS q||fS )N)rH   Fr   r   r   r   ,)rG   r   )r   r   bracket_levelupdated_deltapassed_zerocr   r   r   _filter_delta_text  s    


z$OpenAIServingChat._filter_delta_textcurrent_textfunction_name_returnedtool_call_idxc              	   C   s  |d u s|dkrd |fS zt j}t||\}}W n tjjjtjfy.   t	
d d }Y nw |d u s>t|tr>t|dksFd}d }	|	|fS t||\}}
|d }|
sdd|vs\d|vrdd}d }	|	|fS |std|tj}|ru|d	nd}t||\}}|
rd|vr|d
 }d}t| j|d |d}tt|t|d |dt|d	 ddgd}	|	|fS t||\}}|dkrtttd |dt|d	 dgd}	|	|fS d }	|	|fS )NrH   z(not enough tokens to parse into JSON yetr   Fr   name
parametersz.*"parameters":\s*(.*)r   Tid_type	func_nameidxr  	argumentsfunction)idr  indextype
tool_callsr  r  )r	   ALLrD   partial_json_parsercore
exceptionsMalformedJSONjsonJSONDecodeErrorr   debugrt   listr   rG   r  researchDOTALLgroupr   rw   r   r   r   )r|   r   r  r   r  r  flagsobjr   delta_messagefinishes_previous_toolcurrent_tool_callparam_matchr  tool_call_idr   r   r   $extract_tool_call_required_streaming"  s   


H;
#
z6OpenAIServingChat.extract_tool_call_required_streamingr   r   r^   r   r   r   c	           Q      C  s  ddl m}	 tt }
d}d}|jd u rdn|j}dg| }dg| }d}d }| jr:dd t|D }dg| }dg| }t|jt	rK|jj
j}nd }| oT| |}dg| }| jd	krdt|}nd}d
g| }|so|rg g| }dg| }dg| }d g| }nd }z|r| jr|d u rtd| |g| }nd g| }W n' ty } ztd | |} d|  dV  dV  W Y d }~d S d }~ww |j}!t|!| j\}"}#z|2 z)3 d H W }$|$jd urt|$j}|$jd ur|t|$j7 }|r|$j}| |}%t|D ]9}&t|&t|%d
dd d d}'t|||
|'g||j r"|$jnd d}(|#r1t!|d|d|(_"|(j#dd} d|  dV  q|j$rd
})|rcd|d v rc|d %d|%krc|d d pbd
})|)rt|D ]0}&t|&t|)dd d d}'t|||
|'g|d}(|#rt!|d|d|(_"|(j#dd} d|  dV  qjd}|$j&D ]b}*|*j'}&||& }+|r|$jr||& d u r|(|$j||&< ||& rǐq|j)r|j*d ur|*j)d usJ d| j+|*j,|*j)||j*|j-d},nd },| jr,||& }-|-j.}.g }/|*j,D ]}0|-/|0 |-j0pd
}1|/1t2|-j3|-j.|1 qd
4dd |/D }2|-j3}3|3s+|2r+d}3n|*j5}2|2s=|*j,s=||& s=q|sC|rm|d usJJ |d usQJ ||& }4||& }5|4|2 }6|5rh|5t6|*j, }7nt6|*j,}7| jrt7|-|/|.|j8d\}8}9||&  |9O  < n|r|r||& s|(|5s|d usJ |9|4|6|2|5|7|*j,}8|(t6|*j,s||& rd||&< |8r|8j:r|8j:}6d |8_:nd
}6n|r|4|2 }2d
}6||& rt;t<|2d |&d!}:n't||	rt=> };nt?| j||d"};t;|;d#t<||2d$|&d%}:d||&< |d7 }t|:gd&}8d||&< nH|jd'kr|d us&J ||& }4|4|2 }6||& }<t6|*j,}=|d urJ||& sJ||& rJd||&< |rw||& sw|9|4|6|2|5|7|=}8|(|=rvd||&< |8rt|8j:rt|8j:}6d |8_:nd
}6n|6}>| j@|4|>|2|<|d(\}8||&< |8r|8jAr|8jAd jBd ur|d7 }d||&< n|r*|r*|+d usJ |d usJ |d usJ t6|*j,}=||& s||& rd||&< |=}7n)|9|4|6|2|5|7|=}8|(|=rd||&< |C|=}7|8r|8j:r|8j:}6d |8_:nd
}6||& r)|=}?||& sd||&< d
}4g }5|6}2|7}?|+jD|4|6|2|5|7|?|d)}8|8r)|8jAr)d||&< n7|rM|+d us4J |+jD|4|6|2|5|7|*j,|d)}8|8rL|8jArLd||&< n|r\|9|4|6|2|5|7|*j,}8nt|2d}8|sg|r| js|d usrJ |d usyJ |6||&< |7||&< n|d usJ ||&  |27  < ||&  t|*j,7  < |8d u r|*jEd u r|j sqt }8| jFr| jGrg }@|8j:r|@1|8j: |8jHr|8jH}A|@1d*|A d+ |8jArd
4d,d |8jAD }B|Br|@1d-|B d+ |@r| jIrd.4|@}C| jGjJ||Ct6|*j,|*jEddd/ |*jEd u r$t|&|8|,d |j rt6|*j,nd d0}'n| K|*jE| d}D|+rDt|+jLdk}D|DrAt|+jLd nd}End}E| M|8|*r|+rd}Ft|8jAd j
t<rpt|8jAd j
jNtOrpt|8jAd j
jN}FtPjQ|+jL|E %d1i dd2}G|+jR|E }H|Fdkr|Hd |F  }H|GS|Hd
d}I| T|8|I|E}8|Ds||& r|r| jr||& rd3}Jn	|*jEr|*jEnd4}Jt|&|8|,|J|*jU|j rt6|*j,nd d5}'d||&< tV|'|}'t|||
|'g|d}(|#r||& }Kt!||K||K d|(_"|(j#dd} d|  dV  qq6 |"r;tW|}Kt!||K||K d}L| jXr#|r#tY|d6|L_Zt|||
g ||Ld7}M|Mj#ddd8}Nd|N dV  tW|}Ot!||O||O d|_[| jFr{| jGr{t|D ]%}&|re|&t|k re||& nd9||&  d:}P| jGjJ||Pd d;ddd/ qUW n@ t\y } zd| ]| dV  W Y d }~n(d }~w ty } ztd< | |} d|  dV  W Y d }~nd }~ww dV  d S )=Nr   r>   zchat.completion.chunkTr   Fc                 S   s   g | ]}t  qS r   )r+   )r   r   r   r   r   r     s    zFOpenAIServingChat.chat_completion_stream_generator.<locals>.<listcomp>rb   rH   7Tokenizer not available when `skip_tokenizer_init=True`zError in tool parser creation.zdata: z

zdata: [DONE]

r   )r  deltalogprobsfinish_reason)r  objectcreatedchoicesrl   r   prompt_tokenscompletion_tokenstotal_tokens)exclude_unsetr   r   r   r   )r  r1  r2  r3  rl   Did not output logprobs)	token_idstop_logprobsr   num_output_top_logprobsreturn_as_token_idc                 s   s    | ]\}}}|V  qd S Nr   )r   r   r.  r   r   r   	<genexpr>f  s    zEOpenAIServingChat.chat_completion_stream_generator.<locals>.<genexpr>final)harmony_parsertoken_statesprev_recipientinclude_reasoning)r  r  r
  r  r  )r  r  r  r  r  required)r   r  r   r  r  )r   r  r   previous_token_idscurrent_token_idsdelta_token_idsr   z[reasoning: ]c                 s   s&    | ]}|j r|j jr|j jV  qd S r?  )r  r  r   tcr   r   r   r@    s    
[tool_calls:  r   outputsoutput_token_idsr0  is_streamingr.  )r  r.  r/  r0  r;  r  )ensure_asciir  stop)r  r.  r/  r0  stop_reasonr;  cached_tokens)r  r1  r2  r3  rl   usage)r8  exclude_nonez<streaming_complete: z tokens>streaming_completez*Error in chat completion stream generator.)^vllm.tokenizers.mistralr>   intr   nrp   rangert   r   r   r  r  %_should_stream_with_auto_tool_parsingrw   r   rN   r   r   r   r   create_streaming_error_responsestream_optionsr2   rP   r   r   encoder_prompt_token_idsnum_cached_tokensr   r   r   r   return_token_idsr$   rX  model_dump_jsonechorv   rP  r  r   r/  r<  _create_chat_logprobsr;  rJ   current_recipientprocesslast_content_deltar   r   current_channeljointextrE   r   rE  extract_reasoning_streamingr   r   r   rC   generate_random_idr   r+  r  r  extract_content_idsextract_tool_calls_streamingr0  rQ   rX   	reasoningrR   log_outputs_raise_if_errorprev_tool_call_arr,_should_check_for_unstreamed_tool_arg_tokensr  r   r  dumpsstreamed_args_for_toolreplace_create_remaining_args_deltarU  r0   sumrO   r!   prompt_tokens_detailsfinal_usage_infor%   /_convert_generation_error_to_streaming_response)Qr|   r   r   r   r^   r   r   r   rK   r>   created_timechunk_object_typefirst_iterationnum_choicesprevious_num_tokensfinish_reason_sentnum_prompt_tokensrc  harmony_parsersharmony_tools_streamedtools_streamedtool_choice_function_nametool_choice_autor  history_tool_call_cntprevious_textsall_previous_token_idsadded_content_delta_arrreasoning_end_arrprompt_is_reasoning_end_arrtool_parsersr   datara  include_usageinclude_continuous_usageresr   r   choice_datachunklast_msg_contentoutputrN   r/  rB  rD  rC  token_idtoken_deltar   cur_channelr   rG  r  rH  r&  tools_streamed_flagdelta_tool_callr*  fn_name_returnedrQ  r   rI  delta_content_partsrr  	tool_argsdelta_contentauto_tools_calledr  latest_delta_lenexpected_callactual_callremaining_callfinish_reason_r6  final_usagefinal_usage_chunkfinal_usage_datanum_completion_tokens	full_textr   r   r   r     sp  



























	









		





	


	


	       Y
	 


z2OpenAIServingChat.chat_completion_stream_generatorc	           3   
      s  ddl m}	 tt }
d }z|2 z3 d H W }|}q6 W n% tjy,   | d Y S  tyB } z| |W  Y d }~S d }~ww |d usIJ g }| jdkrUt	|}nd}| 
|}|jD ]}| |j| |j}|j}d }|jr|jd ur|d usJ d| j|||j||jd}nd }| jrt|\}}}|jsd }| jd ur|d u rtd| |}|jd||d	}|j}t||||jd
}nt|||d}t|j|||d ur|jrdn|jr|jnd|j|jrt |jnd d}|!| q_|r
|j"|j#|d\}}|js	d }nd }|j#}d}| j$|||| j%| jd\}}t&||	r't'nt(}| jr<t||||r6|ng d
}n| j%rD| jsZt&|j)t*sZ|j)dkrZt|||d}nh|j)rt+|j)t*u r|d urrt,|dkstJ g }t-|D ]9\} }!|!j.r|!||!j.|!d n!t&||	r|!||!d nt/| j|!j0|d}"|!||"|!d |d7 }qzt||d|d
}n|j)r|j)dkrg }|d urt,|dksJ t-|D ]9\} }#|#j.r|!||#j.|#d n!t&||	r|!||#d nt/| j|#j0|d}"|!||"|#d |d7 }qt|d||d}n|j)r)|j)dkr1t|||d}n|j1r|j)dksA|j)d u r| j%r| jr|d uoSt,|dk}|rg }$t-|D ]9\} }!|!j.rp|$!||!j.|!d n!t&||	r|$!||!d nt/| j|!j0|d}"|$!||"|!d |d7 }q]t||||$d
}n"|}%|rt,|dkr|}%t|||%d}nt23d t|||d}|p|j)o|j)dko|jdk}&t|j|||&rdn|jr|jnd|j|jrt |jnd d}t4||}|!| q_|j5rFd}'|r#d|d v r#|d 6d|kr#|d d p"d}'t&|'t7r3d8dd  |'D }'|D ]}(|'|(j9jp>d })|)|(j9_q5|j:d usNJ t,|j:}*|j;d ur`|*t,|j;7 }*t<d!d  |jD }+t=|*|+|*|+ d"},| j>r|j?rt@|j?d#|,_A|,|_BtC||
|||,tD|jE|jr|j:nd |jFd$}-| jGr| jHr|D ][}(d}.|(j9jr|(j9j}.n*|(j9jrg }/|(j9jD ]}!|!jI}0|/!|0j0 d%|0jJ d& qd'8|/}1d(|1 d)}.|.r d }2|(jt,|jk r|j|(j j}2| jHjK||.|2|(jddd* q|-S )+Nr   r,  zClient disconnectedrb   r:  )r;  r<  r=  r   r>  r-  rH   )r   r;  )r   rr  r   r  )r   rr  r   r  rT  )r  messager/  r0  rU  r;  )r   F)r   r   r   rL   tool_parser_clsrF  )r  r  )r  r
  r   )r   r   r  rr  r   r   z~Error in chat_completion_full_generator - cannot determine if tools should be extracted. Returning a standard chat completion.r   r   r   
c                 s   s    | ]}|d  V  qdS )rm  Nr   )r   msgr   r   r   r@    s    zCOpenAIServingChat.chat_completion_full_generator.<locals>.<genexpr>c                 s   s    | ]}t |jV  qd S r?  )r   r;  )r   r  r   r   r   r@    s    

r4  rV  )r  r2  rl   r3  rX  prompt_logprobsr   kv_transfer_params()z, rM  rJ  rO  )Lr[  r>   r\  r   asyncioCancelledErrorr   r   rw   r   r   rP  rt  r0  r;  r/  r<  rg  rJ   rp   r.   rE  rN   extract_tool_callsr   r   r  r   r  tools_calledrU  rd  rE   r   extract_reasoningrm  _parse_tool_calls_from_contentrL   rt   rC   r#   r   r   r  r   r   r  r   r  r   r   r   r0   rf  rv   r  rl  r  r   rb  r{  r$   rO   rc  r!   r|  r}  r   r'   r  r  rQ   rX   r  r  rs  )3r|   r   r   r   r^   r   r   r   rK   r>   r  	final_resr  r   r3  r  r   r  r;  out_logprobstool_call_infor/  rr  r   r   rN   r  r  r  r  tool_call_classtool_call_class_itemsr  rL  generated_id	tool_calltool_call_itemsret_contentis_finish_reason_tool_callsr  choicefull_messager  num_generated_tokensrX  responseoutput_texttool_call_descriptionsfunction_calltool_calls_strrQ  r   r   r   r   p  sL  












	








	z0OpenAIServingChat.chat_completion_full_generatorr/  r<  should_return_as_token_idc                    s"    fddt | D S )Nc              
      sf   g | ]/\}}r|k sd krt  j|d |d d t|d jdtjddddqS )	r   r   r   )r>      utf-8ry  errors)tokenlogprobbytes)r   _get_decoded_tokenmaxr  r  encode)r   r   pr|   r  r  r   r<  r   r   r     s     z7OpenAIServingChat._get_top_logprobs.<locals>.<listcomp>)r   items)r|   r/  r<  r   r  r   r  r   _get_top_logprobs  s   
z#OpenAIServingChat._get_top_logprobsr;  r=  r>  c                 C   s   g }|dur|n| j }t|D ]h\}}	|| }
|
du s"|
|	du rH|r*d|	 }n|du r2td||	}|t|t|jdddd q|
|	 }|j	}|t| 
||	||t|jd|du rddnt|jddd| |
|||d	 qt|d
S )zCreate OpenAI-style logprobs.Nz	token_id:z:Unable to get tokenizer because `skip_tokenizer_init=True`r  ry  r  )r  r  r  )r  r  r  r<  r9  )rJ   r   rv   r   decoder   r   r  r  decoded_tokenr  r  r  r  r   )r|   r;  r<  r   r=  r>  logprobs_contentr  r   r  step_top_logprobsr  
step_tokenstep_decodedr   r   r   rg    sX   	


z'OpenAIServingChat._create_chat_logprobsc                 C   s   |j o| jo| jo|jdv S )ae  
        Utility function to check if streamed tokens should go through the tool
        call parser that was configured.

        We only want to do this IF user-provided tools are set, a tool parser
        is configured, "auto" tool choice is enabled, and the request's tool
        choice field indicates that "auto" tool choice should be used.
        )r   N)r   rN   rL   r   r   r   r   r   r_  S  s   
z7OpenAIServingChat._should_stream_with_auto_tool_parsingr&  r  c                 C   sL   t |jduo$| jo$| jo$|o$|jo$|jd o$|jd jo$|jd jjduS )z
        Check to see if we should check for unstreamed tool arguments tokens.
        This is only applicable when auto tool parsing is enabled, the delta
        is a tool call with arguments.
        Nr   )boolr0  rL   rN   r  r  r  )r|   r&  r  r   r   r   rv  c  s    

z>OpenAIServingChat._should_check_for_unstreamed_tool_arg_tokensr  r  c              	      sf   t  fdd| jD d}|r|jnd}tt |r|jnd|r"|jndt|r)|jnd|ddgdS )z
        Create a delta message for remaining tool arguments, preserving
        id/type/name from the original delta.
        c                 3   s    | ]
}|j  kr|V  qd S r?  r  rK  r  r   r   r@    s    zAOpenAIServingChat._create_remaining_args_delta.<locals>.<genexpr>Nr  )r  r  r  r  r  )	nextr  r  r   r   r  r  r   r  )r&  r  r  original_tcoriginal_fnr   r  r   rz  {  s"   
z.OpenAIServingChat._create_remaining_args_deltar   c                 C   s   g }t | | jrJ | jrJ t|jd d |d}|| |jr0t|r'|jnd d}|| |t	|j
 t|}t|d}|jd urK|j|d< ||gfS )N)reasoning_effortbrowser_descriptionpython_descriptionwith_custom_tools)r   )r   
cache_salt)r?   rx   rz   r,   r  r   r   r)   rq   r-   r   r/   r3   r  )r|   r   r   r   sys_msgdev_msgr   r   r   r   r   r     s,   







z,OpenAIServingChat._make_request_with_harmony)r[   Nr?  )r   r   )NN)T)2__name__
__module____qualname__r
   r(   r   r   r   r  ru   r   rf   r   r   tupler  r   r:   r   r   r   r   r   r   r   staticmethodr\  r   r  r   r+  r   r7   r=   r"   r9   r   r   r5   r   r  GenericSequencer   rg  r_  r6   rv  rz  r   __classcell__r   r   r}   r   rG   ^   sp   
	

U'
h
 7 
m	


     x	

   


@
rG   )pr  r  r   collections.abcr   r   r   r  typingr   r   r   r  regexr   fastapir   openai_harmonyr   OpenAIMessage partial_json_parser.core.optionsr	   vllm.engine.protocolr
   vllm.entrypoints.chat_utilsr   r   r   r   vllm.entrypoints.loggerr   0vllm.entrypoints.openai.chat_completion.protocolr   r   r   r   r   r   r   r   r   r   6vllm.entrypoints.openai.chat_completion.stream_harmonyr   r   'vllm.entrypoints.openai.engine.protocolr   r   r   r   r    r!   r"   r#   r$   &vllm.entrypoints.openai.engine.servingr%   r&   r'   &vllm.entrypoints.openai.models.servingr(   ,vllm.entrypoints.openai.parser.harmony_utilsr)   r*   r+   r,   r-   r.   r/   vllm.entrypoints.openai.utilsr0   vllm.entrypoints.utilsr1   r2   vllm.inputs.datar3   vllm.loggerr4   vllm.logprobsr5   vllm.outputsr6   r7   vllm.parserr8   vllm.reasoningr9   vllm.renderers.inputsr:   vllm.sampling_paramsr;   r<   vllm.tokenizersr=   r[  r>   r?   r@   rA   vllm.tool_parsersrB   %vllm.tool_parsers.mistral_tool_parserrC   vllm.tool_parsers.utilsrD   vllm.utils.collection_utilsrE   vllm.v1.sample.logits_processorrF   r  r   rG   r   r   r   r   <module>   sP   0,$	