o
    -iK                    @   s8  d dl Z d dlZd dlZd dlmZmZ d dlmZ d dlm	Z	m
Z
 d dlZd dlZd dlZd dlmZ d dlmZ d dlmZ d dlmZ d d	lmZmZmZmZ d d
lmZ d dlm Z m!Z!m"Z"m#Z#m$Z$m%Z%m&Z&m'Z'm(Z(m)Z) d dl*m+Z+m,Z, d dl-m.Z.m/Z/m0Z0m1Z1m2Z2m3Z3m4Z4m5Z5 d dl6m7Z7m8Z8m9Z9 d dl:m;Z; d dl<m=Z=m>Z>m?Z?m@Z@mAZAmBZBmCZC d dlDmEZE d dlFmGZGmHZH d dlImJZJ d dlKmLZL d dlMmNZN d dlOmPZPmQZQ d dlRmSZSmTZT d dlUmVZV d dlWmXZXmYZYmZZZm[Z[ d dl\m]Z] d dl^m_Z_ d dl`maZa d dlbmcZc d dldmeZe eLefZgG dd  d e8ZhdS )!    N)AsyncGeneratorAsyncIterator)Sequence)AnyFinal)Request)Message)Allow)EngineClient)ChatTemplateContentFormatOptionConversationMessageget_history_tool_calls_cntmake_tool_call_id)RequestLogger)
ChatCompletionLogProbChatCompletionLogProbsChatCompletionLogProbsContent"ChatCompletionNamedToolChoiceParamChatCompletionRequestChatCompletionResponseChatCompletionResponseChoice"ChatCompletionResponseStreamChoiceChatCompletionStreamResponseChatMessage)
TokenStateextract_harmony_streaming_delta)DeltaFunctionCallDeltaMessageDeltaToolCallErrorResponsePromptTokenUsageInfoRequestResponseMetadataToolCall	UsageInfo)GenerationErrorOpenAIServingclamp_prompt_logprobs)OpenAIServingModels)get_developer_message%get_stop_tokens_for_assistant_actions#get_streamable_parser_for_assistantget_system_message%parse_chat_inputs_to_harmony_messagesparse_chat_outputrender_for_completion) maybe_filter_parallel_tool_calls)get_max_tokensshould_include_usage)TokensPrompt)init_logger)Logprob)CompletionOutputRequestOutput)BeamSearchParamsSamplingParams)TokenizerLike)MistralTokenizermaybe_serialize_tool_callstruncate_tool_call_idsvalidate_request_params)
ToolParser)MistralToolCall)partial_json_loads)as_list)%validate_logits_processors_parametersc                '       s  e Zd Zddddddddddddddededed	edB d
edB dedededededededB dededededede	ee
f dB ddf& fddZdTddZdedeee ee
 f eB fddZ	dUded edB deedf eB eB fd!d"Zdedefd#d$ZedVd'edefd(d)Zed*ed+edeeef fd,d-Z	dUd+ed.edB d*ed/ed0edB deedB ef fd1d2Zded3ee d4ed5ed6ee d7e dB d8e!deedf fd9d:Z"ded3ee d4ed5ed6ee d7e dB d8e!deeB fd;d<Z#d=e	ee$f d>edB d7e dB d?edee% f
d@dAZ&		dWdBe'e d>e'e	ee$f dB  d7e dB dCedB dDedB de(fdEdFZ)defdGdHZ*dIedB dJe+defdKdLZ,edIedMedNedefdOdPZ-	dXdedQefdRdSZ.  Z/S )YOpenAIServingChatF NT)trust_request_chat_templatereturn_tokens_as_token_idsreasoning_parserenable_auto_tools#exclude_tools_when_tool_choice_nonetool_parserenable_prompt_tokens_detailsenable_force_include_usageenable_log_outputsenable_log_deltaslog_error_stackdefault_chat_template_kwargsengine_clientmodelsresponse_rolerequest_loggerchat_templatechat_template_content_formatrE   rF   rG   rH   rI   rJ   rK   rL   rM   rN   rO   rP   returnc                   s  t  j|||||d || _|| _|| _|| _|pi | _|| _|| _| j	j
| _
| j|	d| _|
| _| j||
d| _|| _|| _|| _| j	 | _| j	jjdkrSd| _nd| _| j	jjdk| _| jrtd| jvrkg | jd< | jd t  d| _d | _d| _d | _d S )	N)rQ   rR   rT   rF   rO   )reasoning_parser_name)tool_parser_namerH   kimi_k2randomgpt_ossstop_token_idsF)super__init__rS   rU   rV   rE   rP   rM   rN   model_configlogits_processors_get_reasoning_parserrG   rH   _get_tool_parserrJ   rI   rK   rL   get_diff_sampling_paramdefault_sampling_params	hf_config
model_typetool_call_id_typeuse_harmonyextendr)   supports_browsingbrowser_toolsupports_code_interpreterpython_tool)selfrQ   rR   rS   rT   rU   rV   rE   rF   rG   rH   rI   rJ   rK   rL   rM   rN   rO   rP   	__class__ l/home/ubuntu/veenaModal/venv/lib/python3.10/site-packages/vllm/entrypoints/openai/chat_completion/serving.pyr_   [   sP   





zOpenAIServingChat.__init__c                    s   t d t }z9| jj}tdddgddd}| j|||j| j	| j
dd	ddd| jdd	d
I dH  t | d }t d| W dS  tyR   t d Y dS w )a$  
        Warm up the chat template processing to avoid first-request latency.

        This method triggers Jinja2 template compilation and content format
        detection that would otherwise happen on the first real request,
        causing increased latency on the first request.
        z&Warming up chat template processing...userwarmuprolecontentN   )messagesmodelmax_completion_tokensTF
rU   rV   add_generation_promptcontinue_final_message
tool_dicts	documentschat_template_kwargsrP   rJ   add_special_tokensi  z(Chat template warmup completed in %.1fmszChat template warmup failed)loggerinfotimeperf_counterrQ   rendererr   _preprocess_chatrz   rU   rV   rP   	Exception	exception)ro   
start_timer   dummy_requestelapsedrr   rr   rs   ru      s<   

zOpenAIServingChat.warmuprequestc                    s  |  |I dH }|durtd| |S | jjr| jjz| jj}|j}| j}t	|t
r9t| t| t| |du oFt	|t
 oF| j }|rl|jdvrl|jdkr\| js\| dW S |jdkrl| d|j dW S |jdu sy|jdkr|| jr|d}nd	d
 |jD }| js| j|j|j| jd}|dur|W S |jpi }|j|jd | j|||j|jp| j| j|j|j||j|| j ||j!dI dH \}	}
n|du}| "||\}	}
W |	|
fS W |	|
fS  t#t$t%t&j'fy } zt(d | |W  Y d}~S d}~ww )z
        render chat request by validating and preprocessing inputs.

        Returns:
            A tuple of (conversation, engine_prompts) on success,
            or an ErrorResponse on failure.
        NzError with model %s)NnoneautozV"auto" tool choice requires --enable-auto-tool-choice and --tool-call-parser to be setztool_choice="z'" requires --tool-call-parser to be setr   c                 S   s   g | ]}|  qS rr   )
model_dump).0toolrr   rr   rs   
<listcomp>  s    z9OpenAIServingChat.render_chat_request.<locals>.<listcomp>)request_chat_templater   rE   )reasoning_effortr}   z$Error in preprocessing prompt inputs))_check_modelr   errorrQ   errored
dead_errorr   	tokenizerrJ   
isinstancer:   r;   r<   r=   ri   tool_choicerH   create_error_responsetoolsrI   _validate_chat_templaterU   r   rE   updater   r   rz   rV   r~   r   r   rP   r   _make_request_with_harmony
ValueError	TypeErrorRuntimeErrorjinja2TemplateErrorr   )ro   r   error_check_retr   r   rJ   tool_parsing_unavailabler   r   conversationengine_promptsshould_include_toolserr   rr   rs   render_chat_request   s   








z%OpenAIServingChat.render_chat_requestraw_requestc                    s  |  |I dH }t|tr|S |\}}d| ||j }t|d}|r)||j_z| j|dd}| j	
|}	W n tttfyW }
 ztd | |
W  Y d}
~
S d}
~
ww | |}g }zt|D ]\}}| |\}}}t|dkrx|n| d| }| jdu ri | _t| j|t|d	 | jd
}|jr||| j}n||| jj| j}t| j| | j||||d |du rdn|  |j!I dH }t|t"r| j#|||||d}n"| j$||||||j%|dI dH \}}| j&j'||||||j%|||d	}|(| qdW n ty }
 z| |
W  Y d}
~
S d}
~
ww t|dks#J |\}| j)j*}|j+r9| ,||||	|||S z| -||||	|||I dH W S  t.y` }
 z| /|
W  Y d}
~
S d}
~
w tyw }
 z| |
W  Y d}
~
S d}
~
ww )z
        Chat Completion API similar to OpenAI's API.

        See https://platform.openai.com/docs/api-reference/chat/create
        for the API specification. This API mimics the OpenAI
        Chat Completion API.
        Nz	chatcmpl-)
request_idT)supports_default_mm_lorasz"Error preparing request componentsry   _prompt_token_ids)max_model_lenr   input_lengthre   )paramslora_request)promptr   r   r   trace_headers)r   r   prioritydata_parallel_rank)r   r   r   prompt_texttokenization_kwargsr   )0r   r   r   _base_request_idr   r!   staterequest_metadata_maybe_get_adaptersrR   
model_namer   r   r   r   r   r   _get_data_parallel_rank	enumerate_get_prompt_componentslenre   r0   r   use_beam_searchto_beam_search_paramsto_sampling_paramsr`   logits_processor_patternrB   ra   _log_inputs_get_trace_headersheadersr7   beam_search_process_inputsr   rQ   generateappendr   r   stream chat_completion_stream_generatorchat_completion_full_generatorr$   %_convert_generation_error_to_response)ro   r   r   resultr   r   r   r   r   r   r   r   
generatorsiengine_promptr   r   sub_request_id
max_tokenssampling_paramsr   	generatorengine_requestr   result_generatorr   rr   rr   rs   create_chat_completionH  s   





	

O
	z(OpenAIServingChat.create_chat_completionc                 C   s   |j r| jS |jd d S )Nrw   )r~   rS   rz   ro   r   rr   rr   rs   get_chat_request_role  s   z'OpenAIServingChat.get_chat_request_role{}sc                 C   s4   d}| D ]}||kr|d7 }q||kr|d8 }q|S )zS
        Calculate the current level of nested brackets in a given string.
        r   ry   rr   )r   openingclosinglevelcharrr   rr   rs   _bracket_level  s   
z OpenAIServingChat._bracket_level
delta_textprevious_textc                 C   sz   t |}d\}}| D ]-}|dkr|d7 }|dk}n|dkr&|d8 }|dk}|dkr/||7 }q|dkr8 ||fS q||fS )N)rD   Fr   ry   r   r   ,)rC   r   )r   r   bracket_levelupdated_deltapassed_zerocrr   rr   rs   _filter_delta_text  s    


z$OpenAIServingChat._filter_delta_textcurrent_textfunction_name_returnedtool_call_idxc              	   C   s  |d u s|dkrd |fS zt j}t||\}}W n tjjjtjfy.   t	
d d }Y nw |d u s>t|tr>t|dksFd}d }	|	|fS t||\}}
|d }|
sdd|vs\d|vrdd}d }	|	|fS |std|tj}|ru|d	nd}t||\}}|
rd|vr|d
 }d}t| j|d |d}tt|t|d |dt|d	 ddgd}	|	|fS t||\}}|dkrtttd |dt|d	 dgd}	|	|fS d }	|	|fS )NrD   z(not enough tokens to parse into JSON yetr   Fr   name
parametersz.*"parameters":\s*(.*)ry   Tid_type	func_nameidxr   	argumentsfunction)idr  indextype
tool_callsr  r  )r	   ALLr@   partial_json_parsercore
exceptionsMalformedJSONjsonJSONDecodeErrorr   debugr   listr   rC   r   researchDOTALLgroupr   rh   r   r   r   )ro   r   r   r   r   r   flagsobjr   delta_messagefinishes_previous_toolcurrent_tool_callparam_matchr   tool_call_idrr   rr   rs   $extract_tool_call_required_streaming  s   


H;
#
z6OpenAIServingChat.extract_tool_call_required_streamingr   r   r   r   r   r   c           O      C  s  t t }d}	d}
|jd u rdn|j}dg| }dg| }d}d }| jr4dd t|D }dg| }dg| }t|jtrE|jjj	}nd }| oN| 
|}dg| }| jdkr^t|}nd}d	g| }|sj| jrzg g| }dg| }dg| }nd }z| jr|d u rtd
| |j| j}| j||d}W n) ty } ztd | t|}d| dV  dV  W Y d }~d S d }~ww z|r| jr|d u rtd
| |g| }nd g| }W n( ty } ztd | |}d| dV  dV  W Y d }~d S d }~ww |j} t| | j\}!}"z|2 z!3 d H W }#|#jd ur6t|#j}|#jd ur6|t|#j7 }|
r|#j}|  |}$t|D ]9}%t!|%t"|$d	dd d d}&t#||	||&g||j$ra|#jnd d}'|"rpt%|d|d|'_&|'j'dd}d| dV  qE|j(rd	}(|rd|d v r|d )d|$kr|d d pd	}(|(rt|D ]0}%t!|%t"|(dd d d}&t#||	||&g|d}'|"rt%|d|d|'_&|'j'dd}d| dV  qd}
|#j*D ]W})|)j+}%||% }*||% rq|j,r|j-d ur|)j,d usJ d| j.|)j/|)j,||j-|j0d}+nd }+| jrU||% },|,j1}-g }.|)j/D ]}/|,2|/ |,j3p1d	}0|.4t5|,j6|,j1|0 q%d	7dd |.D }1|,j6}2|2sT|1rTd}2n|)j8}1|1sf|)j/sf||% sfq|sm| jr|d ustJ |d us{J ||% }3||% }4|3|1 }5|4r|4t9|)j/ }6nt9|)j/}6| jrt:|,|.|-|j;d \}7}8||%  |8O  < n|r5| jr||% s|<|4s|d usJ |=|3|5|1|4|6|)j/}7|<t9|)j/s|#jr|<|#jrd||%< |7r|7j>r|7j>}5d |7_>nd	}5n| jr	|3|1 }1d	}5||% rt?t@|1d!|%d"}9nt?tA d#t@||1d$|%d%}9d||%< t"|9gd&}7d||%< n_|jd'kr|d usBJ ||% }3|3|1 }5||% }:t9|)j/};| jd urm||% sm|#jrm|<|#jrmd||%< | jr||% s|=|3|5|1|4|6|;}7|<|;rd||%< |7r|7j>r|7j>}5d |7_>nd	}5n|5}<| jB|3|<|1|:|d(\}7||%< |7r|7jCr|7jCd jDd ur|d7 }d||%< n|r\| jr\|*d usJ |d usJ |d usJ |d usJ t9|)j/};||% s,|#jr|<|#jrd||%< |;}6n)|=|3|5|1|4|6|;}7|<|;r,d||%< |E|;}6|7r*|7j>r*|7j>}5d |7_>nd	}5||% r[|;}=||% sDd||%< d	}3g }4|5}1|6}=|*jF|3|5|1|4|6|=|d)}7|7r[|7jCr[d||%< n8|r|*d usfJ |*jF|3|5|1|4|6|)j/|d)}7|7r~|7jCr~d||%< n| jr|=|3|5|1|4|6|)j/}7nt"|1d}7|s| jr| js|d usJ |d usJ |5||%< |6||%< n|d usJ ||%  |17  < ||%  t|)j/7  < |7d u r|)jGd u r|j$sqt" }7| jHr?| jIr?g }>|7j>r|>4|7j> |7jJr|7jJ}?|>4d*|? d+ |7jCr#d	7d,d |7jCD }@|@r#|>4d-|@ d+ |>r?| jKr?d.7|>}A| jIjL||At9|)j/|)jGddd/ |)jGd u rXt!|%|7|+d |j$rSt9|)j/nd d0}&n| M|)jG| d}B|*rxt|*jNdk}B|Brut|*jNd nd}Cnd}C| O|7|)r|*rd}Dt|7jCd jt@rt|7jCd jjPtrt|7jCd jjP}DtQjR|*jN|C )d1i dd2}E|*jS|C }F|Ddkr|Fd |D  }F|ET|Fd	d}G| U|7|G|C}7|Bs||% r|r| jr||% rd3}Hn	|)jGr|)jGnd4}Ht!|%|7|+|H|)jV|j$rt9|)j/nd d5}&d||%< tW|&|}&t#||	||&g|d}'|"r)||% }It%||I||I d|'_&|'j'dd}d| dV  qߐq6 |!rptX|}It%||I||I d}J| jYrX|rXtZ|d6|J_[t#||	|g ||Jd7}K|Kj'ddd8}Ld|L dV  tX|}Mt%||M||M d|_\| jHr| jIrt|D ]%}%|r|%t|k r||% nd9||%  d:}N| jIjL||Nd d;ddd/ qW n@ t]y } zd| ^| dV  W Y d }~n(d }~w ty } ztd< | |}d| dV  W Y d }~nd }~ww dV  d S )=Nzchat.completion.chunkTry   r   Fc                 S   s   g | ]}t  qS rr   )r*   )r   r   rr   rr   rs   r     s    zFOpenAIServingChat.chat_completion_stream_generator.<locals>.<listcomp>rZ   rD   7Tokenizer not available when `skip_tokenizer_init=True`r   #Error in reasoning parser creation.zdata: z

zdata: [DONE]

zError in tool parser creation.rv   )r  deltalogprobsfinish_reason)r  objectcreatedchoicesr{   r   prompt_tokenscompletion_tokenstotal_tokens)exclude_unsetrx   r   rw   rx   )r  r#  r$  r%  r{   Did not output logprobs)	token_idstop_logprobsr   num_output_top_logprobsreturn_as_token_idc                 s   s    | ]\}}}|V  qd S Nrr   )r   r   r   rr   rr   rs   	<genexpr>\  s    zEOpenAIServingChat.chat_completion_stream_generator.<locals>.<genexpr>final)harmony_parsertoken_statesprev_recipientinclude_reasoning)r   r  r  r   )r  r  r  r  r  required)r   r   r   r   r   )r   r   r   previous_token_idscurrent_token_idsdelta_token_idsr   z[reasoning: ]c                 s   s&    | ]}|j r|j jr|j jV  qd S r1  )r  r   r   tcrr   rr   rs   r2    s    
[tool_calls:  r   outputsoutput_token_idsr"  is_streamingr   )r  r   r!  r"  r-  r   )ensure_asciir  stop)r  r   r!  r"  stop_reasonr-  cached_tokens)r  r#  r$  r%  r{   usage)r*  exclude_nonez<streaming_complete: z tokens>streaming_completez*Error in chat completion stream generator.)_intr   nri   ranger   r   r   r  r   %_should_stream_with_auto_tool_parsingrh   r   rG   r   #_prepare_extra_chat_template_kwargsr   rP   r   r   r   create_streaming_error_responsestrrJ   r   stream_optionsr1   rL   r   r   encoder_prompt_token_idsnum_cached_tokensr   r   r   r   return_token_idsr#   rJ  model_dump_jsonechogetrB  r  r!  r.  _create_chat_logprobsr-  rF   current_recipientprocesslast_content_deltar   r   current_channeljointextrA   r   r7  is_reasoning_endextract_reasoning_streamingrx   r   r   r   r  r  r  extract_content_idsextract_tool_calls_streamingr"  rM   rT   reasoning_contentrN   log_outputs_raise_if_errorprev_tool_call_arr,_should_check_for_unstreamed_tool_arg_tokensr   r  dumpsstreamed_args_for_toolreplace_create_remaining_args_deltarG  r/   sumrK   r    prompt_tokens_detailsfinal_usage_infor$   /_convert_generation_error_to_streaming_response)Oro   r   r   r   r   r   r   r   created_timechunk_object_typefirst_iterationnum_choicesprevious_num_tokensfinish_reason_sentnum_prompt_tokensrV  harmony_parsersharmony_tools_streamedtools_streamedtool_choice_function_nametool_choice_autor   history_tool_call_cntprevious_textsall_previous_token_idsadded_content_delta_arrreasoning_end_arrr   rG   r   datatool_parsersrT  include_usageinclude_continuous_usageresrw   r   choice_datachunklast_msg_contentoutputrJ   r!  r4  r6  r5  token_idtoken_deltar   cur_channelr   r9  r   r:  r  tools_streamed_flagdelta_tool_callfn_name_returnedrC  rx   r;  delta_content_parts	reasoning	tool_argsdelta_contentauto_tools_calledr  latest_delta_lenexpected_callactual_callremaining_callfinish_reason_r(  final_usagefinal_usage_chunkfinal_usage_datanum_completion_tokens	full_textrr   rr   rs   r   t  s  

























	








		





	


	


	       P
	 


z2OpenAIServingChat.chat_completion_stream_generatorc           .         s4  t t }d }	z|2 z3 d H W }
|
}	q6 W n% tjy&   | d Y S  ty< } z| |W  Y d }~S d }~ww |	d usCJ g }| jdkrOt|}nd}| |}|	j	D ].}| 
|j| |j}|j}d }|jr|jd ur|d us{J d| j|||j||jd}nd }| jrt|\}}}|jsd }| jd ur|d u rtd| |}|jd||d}|j}t||||jd	}nt|||d
}t|j|||d ur|jrdn|jr|jnd|j|jrt|jnd d}|| qY| j r@z|d u rtd| !|j"| j#}| j ||d}W n! t$y. } zt%&d | t'|W  Y d }~  S d }~ww |j(|j)|d\}}|js?d }nd }|j)}d}| j*|||| j+| jd\}}t,|t-r]t.nt/ | j+rg| js|t,|j0t1s||j0dkr|t|||d
}n|j0rt2|j0t1u r|d urt3|dksJ t||d fdd|D d	}n|j0r|j0dkrg }|d urt3|dksJ |D ]}| t4| j|j5|d|d |d7 }qt|d||d}nh|j0r|j0dkrt|||d
}nV|j6r?|j0dks|j0d u r?| j+r?| jr?|d uot3|dk}|r)t|||dd |D d	}n"|} |r7t3|dkr7|} t||| d
}nt%7d t|||d
}|p\|j0o\|j0dko\|jdk}!t|j|||!rgdn|jrn|jnd|j|jrzt|jnd d}t8||}|| qY|j9rd}"|rd|d v r|d :d |kr|d d pd}"t,|"t;rd!<d"d# |"D }"|D ]}#|"|#j=jpd }$|$|#j=_q|	j>d usJ t3|	j>}%|	j?d ur|%t3|	j?7 }%t@d$d# |	j	D }&tA|%|&|%|& d%}'| jBr|	jCrtD|	jCd&|'_E|'|_FtG|||||'tH|	jI|jr|	j>nd |	jJd'}(| jKr| jLr|D ]h}#d})|#j=jr=|#j=j})n7|#j=jrtg }*|#j=jD ] }+tM|+jNd(rgtM|+jNd)rg|*|+jNj5 d*|+jNjO d+ qHd,<|*},d-|, d.})|)rd }-|#jt3|	j	k r|	j	|#j j}-| jLjP||)|-|#jddd/ q/|(S )0NzClient disconnectedrZ   r   r,  )r-  r.  r/  r   r0  r  rD   )r   r-  )rw   r  rx   r  )rw   r  rx   r  rF  )r  messager!  r"  rG  r-  r  r  )r   F)r   r   rx   rH   tool_parser_clsr8  c                    s   g | ]} |d qS ))r  rr   r=  tool_call_classrr   rs   r     s    zDOpenAIServingChat.chat_completion_full_generator.<locals>.<listcomp>r   )r  r  ry   )rw   rx   r  r  r   r   c                 S   s   g | ]}t |d dqS )r  )r  r  )r"   r=  rr   rr   rs   r   5  s    z~Error in chat_completion_full_generator - cannot determine if tools should be extracted. Returning a standard chat completion.rx   r   rw   
c                 s   s    | ]}|d  V  qdS )ra  Nrr   )r   msgrr   rr   rs   r2  y  s    zCOpenAIServingChat.chat_completion_full_generator.<locals>.<genexpr>c                 s   s    | ]}t |jV  qd S r1  )r   r-  )r   r  rr   rr   rs   r2    s    

r&  rH  )r  r$  r{   r%  rJ  prompt_logprobsr   kv_transfer_paramsr   r   ()z, r?  r<  rA  )QrM  r   asyncioCancelledErrorr   r   rh   r   r   rB  rh  r"  r-  r!  r.  r[  rF   ri   r-   r7  rJ   extract_tool_callsrx   r   r  r   r  tools_calledrG  rW  rA   r   rG   rQ  r   rP   r   r   r   rS  extract_reasoningra  _parse_tool_calls_from_contentrH   r   r:   r?   r"   r   r   r  r   r   r   r   r   r/   rY  rZ  r  r`  r  r   rU  ro  r#   rK   rV  r    rp  rq  r   r&   r  r  rM   rT   hasattrr  r   rg  ).ro   r   r   r   r   r   r   r   rs  	final_resr  r   r%  r  rw   r  r-  out_logprobstool_call_infor!  r  rx   r   rJ   r  r  r   rG   r  r  tool_call_class_items	tool_callret_contentis_finish_reason_tool_callsr  choicefull_messagery  num_generated_tokensrJ  responseoutput_texttool_call_descriptionsr>  tool_calls_strrC  rr   r  rs   r   g  s  












	






	z0OpenAIServingChat.chat_completion_full_generatorr!  r.  should_return_as_token_idc                    s"    fddt | D S )Nc              
      sf   g | ]/\}}r|k sd krt  j|d |d d t|d jdtjddddqS )	r   ry   r   )r0      utf-8rm  errors)tokenlogprobbytes)r   _get_decoded_tokenmaxr  r  encode)r   r   pro   r  r  r   r.  rr   rs   r     s     z7OpenAIServingChat._get_top_logprobs.<locals>.<listcomp>)r   items)ro   r!  r.  r   r  rr   r  rs   _get_top_logprobs  s   
z#OpenAIServingChat._get_top_logprobsr-  r/  r0  c                 C   s   g }|dur|n| j }t|D ]h\}}	|| }
|
du s"|
|	du rH|r*d|	 }n|du r2td||	}|t|t|jdddd q|
|	 }|j	}|t| 
||	||t|jd|du rddnt|jddd| |
|||d	 qt|d
S )zCreate OpenAI-style logprobs.Nz	token_id:z:Unable to get tokenizer because `skip_tokenizer_init=True`r  rm  r  )r  r  r  )r  r  r  r.  r+  )rF   r   rZ  r   decoder   r   r  r  decoded_tokenr  r  r  r  r   )ro   r-  r.  r   r/  r0  logprobs_contentr  r   r  step_top_logprobsr  
step_tokenstep_decodedrr   rr   rs   r[    sX   	


z'OpenAIServingChat._create_chat_logprobsc                 C   s   |j o| jo| jo|jdv S )ae  
        Utility function to check if streamed tokens should go through the tool
        call parser that was configured.

        We only want to do this IF user-provided tools are set, a tool parser
        is configured, "auto" tool choice is enabled, and the request's tool
        choice field indicates that "auto" tool choice should be used.
        )r   N)r   rJ   rH   r   r   rr   rr   rs   rP    s   
z7OpenAIServingChat._should_stream_with_auto_tool_parsingr  r  c                 C   sL   t |jduo$| jo$| jo$|o$|jo$|jd o$|jd jo$|jd jjduS )z
        Check to see if we should check for unstreamed tool arguments tokens.
        This is only applicable when auto tool parsing is enabled, the delta
        is a tool call with arguments.
        Nr   )boolr"  rH   rJ   r  r  r   )ro   r  r  rr   rr   rs   rj  +  s    

z>OpenAIServingChat._should_check_for_unstreamed_tool_arg_tokensr  r  c              	      sf   t  fdd| jD d}|r|jnd}tt |r|jnd|r"|jndt|r)|jnd|ddgdS )z
        Create a delta message for remaining tool arguments, preserving
        id/type/name from the original delta.
        c                 3   s    | ]
}|j  kr|V  qd S r1  r  r=  r  rr   rs   r2  N  s    zAOpenAIServingChat._create_remaining_args_delta.<locals>.<genexpr>Nr   )r  r  r  r  r  )	nextr  r  r   r   r  r  r   r   )r  r  r  original_tcoriginal_fnrr   r  rs   rn  C  s"   
z.OpenAIServingChat._create_remaining_args_deltar   c                 C   s   g }t | | jrJ | jrJ t|jd d |d}|| |jr0t|r'|jnd d}|| |t	|j
 t|}t|d}|jd urK|j|d< ||gfS )N)r   browser_descriptionpython_descriptionwith_custom_tools)r   )r   
cache_salt)r;   rk   rm   r+   r   r   r   r(   rj   r,   rz   r.   r2   r  )ro   r   r   rz   sys_msgdev_msgr   r   rr   rr   rs   r   `  s,   







z,OpenAIServingChat._make_request_with_harmony)rW   Nr1  )r   r   )NN)T)0__name__
__module____qualname__r
   r'   rS  r   r   r  dictr   r_   ru   r   tupler  r   r   r   r   r   r   r   r   staticmethodrM  r   r   r   r  r   r6   r9   r!   r   r   r4   r   r  GenericSequencer   r[  rP  r5   rj  rn  r   __classcell__rr   rr   rp   rs   rC   Z   sb   
	

N1
q
  
d
	
     x	
  ^


@
rC   )ir  r  r   collections.abcr   r   r   r  typingr   r   r   r	  regexr  fastapir   openai_harmonyr   OpenAIMessage partial_json_parser.core.optionsr	   vllm.engine.protocolr
   vllm.entrypoints.chat_utilsr   r   r   r   vllm.entrypoints.loggerr   0vllm.entrypoints.openai.chat_completion.protocolr   r   r   r   r   r   r   r   r   r   6vllm.entrypoints.openai.chat_completion.stream_harmonyr   r   'vllm.entrypoints.openai.engine.protocolr   r   r   r   r    r!   r"   r#   &vllm.entrypoints.openai.engine.servingr$   r%   r&   &vllm.entrypoints.openai.models.servingr'   ,vllm.entrypoints.openai.parser.harmony_utilsr(   r)   r*   r+   r,   r-   r.   vllm.entrypoints.openai.utilsr/   vllm.entrypoints.utilsr0   r1   vllm.inputs.datar2   vllm.loggerr3   vllm.logprobsr4   vllm.outputsr5   r6   vllm.sampling_paramsr7   r8   vllm.tokenizersr9   vllm.tokenizers.mistralr:   r;   r<   r=   vllm.tool_parsersr>   %vllm.tool_parsers.mistral_tool_parserr?   vllm.tool_parsers.utilsr@   vllm.utils.collection_utilsrA   vllm.v1.sample.logits_processorrB   r  r   rC   rr   rr   rr   rs   <module>   sJ   0(
$	