o
    ۷i                    @   s  d dl Z d dlZd dlZd dlZd dlZd dlmZmZmZ d dl	m	Z	m
Z
mZ d dlmZ d dlmZmZmZmZmZ d dlZd dlZd dlmZ d dlmZ d dlmZ d d	lmZ d d
lmZ d dl m!Z! d dl"m#Z#m$Z$ zd dl%Z%W n e&y   dZ%Y nw d dl'm(Z) d dl*m+Z+m,Z,m-Z-m.Z.m/Z/ d dl0m1Z1m2Z2m3Z3m4Z4m5Z5m6Z6 d dl7m8Z8 d dl9m:Z:m;Z;m<Z<m=Z=m>Z>m?Z?m@Z@mAZAmBZBmCZCmDZD d dlEmFZFmGZG d dlHmIZImJZJ d dlKmLZL d dlMmNZN d dlOmPZP d dlQmRZR d dlSmTZT d dlUmVZV d dlWmXZX d dlYmZZZ d dl[m\Z\ d dl]m^Z^ d dl_m`Z` d dl_m`Za d dlbmcZcmdZdmeZemfZf d d lgmhZh d d!limjZj d d"lkmlZl d d#lmmnZn d d$lompZp d d%lqmrZrmsZs d d&ltmuZu d d'lvmwZw d d(lxmyZy erkd d)lzm{Z{ eTe|Z}G d*d+ d+e8enZ~dS ),    N)AsyncGeneratorAsyncIteratorCallable)datetime	timedeltatimezone)BytesIO)TYPE_CHECKINGAnyFinalOptionalcast)RequestImage)TypeAdapter)BaseRenderer)	AsyncOmni)OmniChatCompletionResponse)OmniDiffusionSamplingParamsOmniTextPrompt)ChatCompletionAudio)ChatCompletionMessageParamChatTemplateContentFormatOptionConversationMessageget_history_tool_calls_cntmake_tool_call_id)"ChatCompletionNamedToolChoiceParamChatCompletionRequestChatCompletionResponseChatCompletionResponseChoice"ChatCompletionResponseStreamChoiceChatMessage)OpenAIServingChat)DeltaFunctionCallDeltaMessageDeltaToolCall	ErrorInfoErrorResponseFunctionCallFunctionDefinitionPromptTokenUsageInfoRequestResponseMetadataToolCall	UsageInfo)ChatLikeRequestclamp_prompt_logprobs)#get_streamable_parser_for_assistantparse_chat_output)ResponsesRequest) maybe_filter_parallel_tool_calls)should_include_usage)
PromptType)init_logger)RequestOutput)ReasoningParser)merge_kwargs)	TokPrompt)SamplingParams)TokenizerLike)MistralTokenizermaybe_serialize_tool_callstruncate_tool_call_idsvalidate_request_params)
ToolParser)MistralToolCall)as_list)
AudioMixin) OmniChatCompletionStreamResponse)AudioResponseCreateAudio)LoRARequest)stable_lora_int_id)OmniRequestOutput)AsyncOmniDiffusionc                   @   s  e Zd ZU dZdZeed< dZed ed< dZ	e
ed< ed	dd
e
dd fddZ	dOdededB dee
df eB eB fddZ								dPdeeB dee de
dB dedee
ef dB deee
ef  dB deegef dB dedB dededeee
e
f  dB dedeee ee  f fddZ!d ee dee" fd!d"Z#de$fd#d$Z%h d%Z&e'e
 ed&< d'e"dede"fd(d)Z(dedee" fd*d+Z)d,e
d-e*e B d.ee" dB d/e+dB ddf
d0d1Z,	dOded2e-e. d,e
d
e
d3ee d4e/d5e0d6e1dB fd7d8Z2	dOded2e-e. d,e
d
e
d3ee d4ed5e0d6e1dB dee3B fd9d:Z4	dOded;e5d4ed3ee d<e
d6e1dB fd=d>Z6	dQd;e5d<e
ded?efd@dAZ7	dQd;e5d<e
ded?efdBdCZ8	dOdededB deeB fdDdEZ9deee
ef  dee
ee
 f fdFdGZ:	H	IdRdJe
dKe
dLe$defdMdNZ;dS )SOmniOpenAIServingChata/  OpenAI-compatible chat serving for both LLM and Diffusion models.

    This class extends OpenAIServingChat to support:
    - Standard LLM chat completions
    - Diffusion model image generation via chat interface

    For diffusion mode, use the `for_diffusion` class method to create an instance.
    F_diffusion_modeNrL   _diffusion_engine _diffusion_model_namediffusion_engine
model_namereturnc                 C   s    |  | }d|_||_||_|S )a  Create a chat serving instance for diffusion models.

        Args:
            diffusion_engine: The async diffusion engine
            model_name: Name of the model being served

        Returns:
            OmniOpenAIServingChat instance configured for diffusion mode

        Note:
            Request-level parameters (num_inference_steps, guidance_scale, seed,
            height, width, num_frames, fps, etc.) are passed per-request via the API.
        T)__new__rN   rO   rQ   )clsrR   rS   instance rX   _/home/ubuntu/vllm_env/lib/python3.10/site-packages/vllm_omni/entrypoints/openai/serving_chat.pyfor_diffusionq   s
   
z#OmniOpenAIServingChat.for_diffusionrequestraw_requestc           /         s  | j r| ||I dH S | |I dH }|dur!td| |S | jjr)| jjz| j|dd}| j	
|}| j}| }|du rJ| j I dH }d}| jr^| |j| j}	| j||	d}| j}
t|trrt| t| t| |
du ot|t o| j }|r|jdvr|jdkr| js| dW S |jdkr| d	|j d
W S |jdu s|jdkr| jrd}ndd |jD }| js
| j|j|j| jd}|dur|W S |jpi }	|	j |j!d | |	| j}| j"||j#|jp| j| j$|||
||j%|j&t'|dd|j(dI dH \}}n|du}| )||\}}W n( t*t+t,t-j.fy? } zt/d | | d|j0 W  Y d}~S d}~ww d| 1||j2 }t3|d}|rV||j4_5t'|d| jj6}|dure|n| jj6|_7|j7rd|j7v rzg }|j#D ]/}t8|dr|9|:  qzt|t;r|9| qz|9t'|ddt'|ddd qz| <|\}}|s| dW S t'|ddpi }|=d }|=d!}d"|v rz%|d" }t|t>rd#|? v r|? @d#\}}tA|tA|}}W n
 tBy   Y nw |=d$}d} |r-ztCD|d% }!tEFtG|!}"d|"i} W n tBy,   d} Y nw d&|i}#|dur:||#d$< i }$|durE||$d'< |durN||$d(< |$rU|$|#d)< | dur^| |#d*< |#g}|}%|}&W n! tBy } ztHd+| d}%d}&W Y d}~n	d}~ww d}%d}&g }'zftI|D ]_\}(})t8|d,r| J|jK}*n| L|}*|%dus|&durtI|*D ]!\}+},t8|,d r|%dur|%|,_Mt8|,d!r|&dur|&|,_Nq| jO||)|*|d- | jjP|)||*|d.}-|'9|- qW n t*y } z| |W  Y d}~S d}~ww tQ|'d/ksJ |'\}.|jRr%| S||.||||||S z| T||.||||||I dH W S  t*yM } z| |W  Y d}~S d}~ww )0aU  
        Chat Completion API similar to OpenAI's API.

        See https://platform.openai.com/docs/api-reference/chat/create
        for the API specification. This API mimics the OpenAI
        Chat Completion API.

        For diffusion models, this generates images and returns them
        in a chat completion response format.
        NzError with model %sT)supports_default_mm_loras)chat_template_kwargs)NnoneautozV"auto" tool choice requires --enable-auto-tool-choice and --tool-call-parser to be setztool_choice="z'" requires --tool-call-parser to be setr_   c                 S   s   g | ]}|  qS rX   )
model_dump).0toolrX   rX   rY   
<listcomp>   s    z@OmniOpenAIServingChat.create_chat_completion.<locals>.<listcomp>)request_chat_templater^   trust_request_chat_template)reasoning_effort	documents)
default_templatedefault_template_content_formatdefault_template_kwargs
tool_dictstool_parserrendereradd_generation_promptcontinue_final_messagerh   add_special_tokensz$Error in preprocessing prompt inputs 	chatcmpl-)
request_id
modalitiesimagera   roleusercontentrP   rw   ry    No text prompt found in messages
extra_bodyheightwidthsizexnegative_promptr   prompttarget_htarget_wmm_processor_kwargsmulti_modal_dataz?Failed to build image-generation prompt for omni multistage: %ssampling_params_list)params_listlora_request)r   rt   r   output_modalities   )UrN   !_create_diffusion_chat_completion_check_modelloggererrorengine_clienterrored
dead_error_maybe_get_adaptersmodelsrS   rn   get_tokenizerreasoning_parser_cls#_prepare_extra_chat_template_kwargsr^   default_chat_template_kwargsrm   
isinstancer>   r?   r@   rA   use_harmonytool_choiceenable_auto_toolscreate_error_responsetools#exclude_tools_when_tool_choice_none_validate_chat_templatechat_templaterf   updaterg   _preprocess_chatmessageschat_template_content_formatro   rp   getattrrq   _make_request_with_harmony
ValueError	TypeErrorRuntimeErrorjinja2TemplateError	exception	__cause___base_request_idrt   r,   staterequest_metadatar   ru   hasattrappendra   dict$_extract_diffusion_prompt_and_imagesgetstrlowersplitint	Exceptionbase64	b64decoder   openr   warning	enumerate_to_sampling_params_listr   (_build_sampling_params_list_from_requestr}   r~   _log_inputsgeneratelenstream chat_completion_stream_generatorchat_completion_full_generator)/selfr[   r\   error_check_retr   rS   rn   	tokenizerreasoning_parserr^   rm   tool_parsing_unavailablerl   merged_template_kwargsconversationengine_promptsshould_include_toolsert   r   r   messages_as_dictsmsgextracted_promptreference_imagesr|   r}   r~   size_strwhr   engine_prompt_image	img_bytesimgtpromptr   _image_gen_height_image_gen_width
generatorsiengine_promptr   idxsp	generatorresult_generatorrX   rX   rY   create_chat_completion   s  





"
	












z,OmniOpenAIServingChat.create_chat_completionTr   ri   rj   rk   rl   rm   rn   ro   rp   rh   rq   c                    s  |d u r| j }t|t|||	|
|t|jtd}| j}||	|}|j
|g||fdddD dI d H \\}\}tdd pHi }|ddrt|tr|d	}|d urd
|v rd|vrddlm  g }|D ](}|dpwg D ]}t|tr|ddkr|di d}|r|| qxqo|rtj fdd|D  I d H }t||d	i d< | }|d uotdoĈjdk}|rttsd}t|||jdt| jdd }|r'd|v r'g }|D ]+}t|dr||  qt|tr|| q|t|ddt|ddd q| |\}}|r'||d< tdd }|d ur6||d< td rGjd urGj|d < ||gfS )!N)r   rh   ro   rp   rq   tokenizec                    s&   i | ]}t  |d  d ur|qS N)r   )rb   k)r[   vrX   rY   
<dictcomp>  s     z:OmniOpenAIServingChat._preprocess_chat.<locals>.<dictcomp>)r   
cache_salt)prompt_extrasr   use_audio_in_videoFr   videoaudior   extract_audio_from_video_asyncry   type	video_urlurlc                 3   s    | ]} |V  qd S r   rX   )rb   ur   rX   rY   	<genexpr>      z9OmniOpenAIServingChat._preprocess_chat.<locals>.<genexpr>r   r_   z5Tool usage is only supported for Chat Completions APIr[   r   rv   ra   rw   rx   rP   rz   r   r   )rn   r:   r   r   r   r>   build_tok_paramsmodel_configbuild_chat_paramswith_defaultsrender_chat_asyncr   r    vllm_omni.entrypoints.chat_utilsr   r   asynciogatherlist
setdefaultr   r   r   r   NotImplementedErroradjust_requestr   ra   r   r   )r   r[   r   ri   rj   rk   rl   rm   rn   ro   rp   rh   rq   
tok_paramschat_paramsr   r   
mm_proc_kwmm_data
video_urlsr   partr   audiosr   should_parse_toolsr   r   r   _r   rX   )r   r[   r   rY   r     s   









z&OmniOpenAIServingChat._preprocess_chatr   c                 C   sT   g }|D ]#}t |tr|tdi | qt |tr!|| qtd| |S )NzInvalid sampling params: rX   )r   r   r   r<   r   )r   r   final_sampling_params_listsampling_paramsrX   rX   rY   r     s   

z.OmniOpenAIServingChat._to_sampling_params_listc                 C   s,   t | jjD ]\}}|jr|  S qtd)NzBNo comprehension stage (is_comprehension=True) found in stage_list)r   r   
stage_listis_comprehensionr   )r   r   stagerX   rX   rY   _get_comprehension_stage_index$  s
   z4OmniOpenAIServingChat._get_comprehension_stage_index>   seedstoptop_ktop_p
ignore_eos
max_tokens
min_tokenstemperaturestop_token_idspresence_penaltyfrequency_penalty_OPENAI_SAMPLING_FIELDSdefault_paramsc                 C   s8   |  }| jD ]}t||d}|durt||| q|S )a  Clone default params and override with user-provided request values.

        Starts with YAML defaults and only overrides fields that the user
        explicitly provided (non-None values) in the request.

        Args:
            default_params: Default SamplingParams from stage config YAML.
            request: The chat completion request containing user-provided values.

        Returns:
            New SamplingParams with YAML defaults overridden by request values.
        N)cloner!  r   setattr)r   r"  r[   params
field_namevaluerX   rX   rY   _apply_request_overrides<  s   
z.OmniOpenAIServingChat._apply_request_overridesc                 C   sp   | j j}|  }g }t|D ]'\}}t|trtdi |}||kr.| ||}|| q||	  q|S )a;  Build sampling_params_list using standard OpenAI API parameters.

        For the comprehension stage, starts with YAML defaults and overrides with
        user-provided request values. For other stages, uses cloned YAML defaults.

        This approach ensures all YAML defaults (including seed, detokenize, etc.)
        are preserved while allowing users to override specific parameters.

        Args:
            request: The chat completion request containing OpenAI API parameters.

        Returns:
            List of SamplingParams, one for each stage.
        NrX   )
r   default_sampling_params_listr  r   r   r   r<   r(  r   r#  )r   r[   default_params_listcomprehension_idxr   r   r"  r%  rX   rX   rY   r   V  s   
z>OmniOpenAIServingChat._build_sampling_params_list_from_requestrt   inputsr   r   c                 C   s:   | j d u rd S | |}| j j||j|j|j||d d S )N)r%  r   )request_logger_extract_prompt_components
log_inputstext	token_idsembeds)r   rt   r,  r   r   
componentsrX   rX   rY   r   x  s   


z!OmniOpenAIServingChat._log_inputsr   r   r   r   r   c	           Q      C  s  t t }	d}
i }t|dr|jd usJ d|jD ]}d||< q|jd u r*dn|j}dg| }dg| }d}d }| jrLdd	 t|D }dg| }dg| }t|jt	r]|jj
j}nd }| of| |}dg| }| jd
krvt|}nd}dg| }|s|rg g| }dg| }dg| }nd }z|r| jr| |g| }nd g| }W n' ty } ztd | |}d| dV  dV  W Y d }~d S d }~ww |j} t| | j\}!}"d }#z`|2 z3 d H W }$|$j}%|$j}&|%|vrtd|% d q|$jr|$j}#|&jd urt|&j}|&jd ur|t|&j7 }| |}'||% r|%dkr|&j}t|D ]:}(t |(t!|'ddd d d})t"||
|	|)g||j#rM|&jnd |%d}*|"r]t$|d|d|*_%|*j&dd}d| dV  q1|j'rd}+|rd|d v r|d (d|'kr|d d pd}+|+rt|D ]1}(t |(t!|+dd d d})t"||
|	|)g||%d}*|"rt$|d|d|*_%|*j&dd}d| dV  qd||%< |%dkr|&j)D ]},|,j*}(||( }-||( rq|j+r|j,d ur|,j+d usJ d| j-|,j.|,j+||j,|j/d}.nd }.| jr2||( }/|/j0}0d}1|,j.D ]}2|/1|2 |1|/j2p'd7 }1q|/j3}3|/j0}4n|r9||( nd}5|,j4d urK|,j4t|5d  }1nd}1|1s[|,j.s[||( s[q|sa|r|d ushJ |d usoJ ||( }5||( }6|5|1 }7|6r|6t5|,j. }8nt5|,j.}8| jr|3dkrt!|1d}9nl|3d kr|j6rt!|1d!}9n\d }9nX|3d"kr|4r|47d#rd}:|/j8D ]};|;j9d"kr|;j:r|;j:7d#r|:d7 }:q|0|4kr|4;d#dd }<t!t<t= d$t>|<dd%|:d&gd'}9n|1rt!t<|:t>|1d(d)gd'}9nd }9|9d urd||(< nd }9n|r|rj||( sj|?|6sj|d us4J |@|5|7|1|6|8|,j.}9|?t5|,j.sS|&jrh|?|&jrhd||(< |9rf|9jArf|9jA}7d |9_And}7n|rs|5|1 }1d}7||( rt<t>|1d(|(d*}=nt<t= d$t>||1d%|(d&}=d||(< t!|=gd'}9d||(< nh|jd+kr.|d usJ ||( }5|5|1 }7||( }>t5|,j.}?|d ur||( s|&jr|?|&jrd||(< |r||( s|@|5|7|1|6|8|?}9|?|?rd||(< |9r|9jAr|9jA}7d |9_And}7n|7}@| jB|5|@|1|>|d,\}9||(< |9r-|9jCr-|9jCd jDd ur-|d7 }d||(< n|r|r|-d us;J |d usBJ |d usIJ |d usPJ t5|,j.}?||( s|@|5|7|1|6|8|?}9|&jr|?|&jrd||(< |?}8|9r|9jAr|9jA}7d |9_And}7|?|?rd||(< |E|?}8|9r|9jAr|9jA}7d |9_Aned}7nb|?}A||( sd||(< d}5g }6|7}1|8}A|-jF|5|7|1|6|8|A|d-}9|9r|9jCrd||(< n7|r|-d usJ |-jF|5|7|1|6|8|,j.|d-}9|9r|9jCrd||(< n|r|@|5|7|1|6|8|,j.}9nt!|1d}9|s|r(| js(|d usJ |d usJ |7||(< |8||(< n|d us/J ||(  |17  < ||(  t|,j.7  < |9d u rV|,jGd u rS|j#sSqt! }9| jHr| jIrd}B|9jArh|9jA}Bn|9jCrwdJd.d/ |9jCD }B|Br| jIjK||Bt5|,j.|,jGddd0 |,jGd u rt |(|9|.d |j#rt5|,j.nd d1})nd}C|-rt|-jLdk}C|Crt|-jLd nd}Dnd}D| M|9|,r|-rd}Et|9jCd j
t>rt|9jCd j
jNtOrt|9jCd j
jN}EtPjQ|-jL|D (d2i dd3}F|-jR|D }G|Edkr|Gd |E  }G|FS|Gdd}Ht!t<|Dt>|Hd(jTdd4d)gd'}9|Cs3||( r*|r3| jr6||( r6d5}In	|,jGr=|,jGnd6}It |(|9|.|I|,jU|j#rOt5|,j.nd d7})d||(< tV|)|})t"||
|	|)g||%|$jd8}*|"rz||( }Jt$||J||J d|*_%|*j&dd}d| dV  qq|%d9kr| |}'| jW|$|'|dd:}Kt"||
|	|K||%d}*t$|d|d|*_%|*j&dd}d| dV  qtd;|%  q6 |!rtX|}Jt$||J||J d}L| jYr|rtZ|d<|L_[t"||
|	g ||L|#d=}M|Mj&ddd>}Nd|N dV  tX|}Ot$||O||O d|_\| jHr=| jIr=t|D ]%}(|r'|(t|k r'||( nd?||(  d@}P| jIjK||Pd dAddd0 qW n$ tyb } ztdB | |}d| dV  W Y d }~nd }~ww dV  d S )CNzchat.completion.chunkru   z0Streaming request must specify output modalitiesTr   r   Fc                 S   s   g | ]}t  qS rX   )r1   )rb   r  rX   rX   rY   rd     s    zJOmniOpenAIServingChat.chat_completion_stream_generator.<locals>.<listcomp>kimi_k2rP   Error in tool parser creation.zdata: z

zdata: [DONE]

final output type:  is not needed by the requestr0  rz   )indexdeltalogprobsfinish_reason)idobjectcreatedchoicesmodelprompt_token_idsmodalityprompt_tokenscompletion_tokenstotal_tokens)exclude_unsetry   rw   )ry   )r<  r=  r>  r?  r@  rB  Did not output logprobs)r1  top_logprobsr   num_output_top_logprobsreturn_as_token_idfinalanalysis)	reasoning
commentaryz
functions.functionname	arguments)r<  r   rQ  r8  )
tool_calls)rT  )r8  rQ  )rQ  r8  required)previous_textcurrent_text
delta_textfunction_name_returnedtool_call_idx)rW  rX  rY  previous_token_idscurrent_token_idsdelta_token_idsr[   c                 s   s&    | ]}|j r|j jr|j jV  qd S r   )rQ  rT  )rb   tcrX   rX   rY   r     s    
zIOmniOpenAIServingChat.chat_completion_stream_generator.<locals>.<genexpr>rt   outputsoutput_token_idsr;  is_streamingr9  )r8  r9  r:  r;  r1  rT  ensure_ascii)exclude_nonerU  r  r8  r9  r:  r;  stop_reasonr1  )r<  r=  r>  r?  r@  rB  metricsr   r   z)Unsupported streaming final output type: cached_tokens)r<  r=  r>  r?  r@  usageri  )rG  rf  z<streaming_complete: z tokens>streaming_completez*Error in chat completion stream generator.)]r   timer   ru   nr   ranger   r   r   rQ  rS  %_should_stream_with_auto_tool_parsingtool_call_id_typer   rm   r   r   r   create_streaming_error_responsestream_optionsr5   enable_force_include_usagefinal_output_typerequest_outputr   ri  rA  r   encoder_prompt_token_idsget_chat_request_rolenum_cached_tokensr!   r%   rF   return_token_idsr.   rm  model_dump_jsonechor   ra  r8  r:  rJ  _create_chat_logprobsr1  return_tokens_as_token_idscurrent_recipientprocesslast_content_deltacurrent_channelr0  rD   include_reasoning
startswithr   channel	recipientr   r&   r   r$   is_reasoning_endextract_reasoning_streamingry   $extract_tool_call_required_streamingrU  r<  extract_content_idsextract_tool_calls_streamingr;  enable_log_outputsr-  joinlog_outputsprev_tool_call_arr,_should_check_for_unstreamed_tool_arg_tokensrT  r   jsondumpsstreamed_args_for_toolreplacera   rh  r4   _create_audio_choicesumenable_prompt_tokens_detailsr+   prompt_tokens_detailsfinal_usage_info)Qr   r[   r   rt   rS   r   r   r   r   created_timechunk_object_typefirst_iteration_dictrB  num_choicesprevious_num_tokensfinish_reason_sentnum_prompt_tokensr{  harmony_parsersharmony_tools_streamedtools_streamedtool_choice_function_nametool_choice_autorZ  history_tool_call_cntprevious_textsall_previous_token_idsadded_content_delta_arrreasoning_end_arrtool_parsersr   dataru  include_usageinclude_continuous_usagelast_metricsomni_resrw  resrw   r   choice_datachunklast_msg_contentoutputrm   r:  harmony_parserprev_recipientrY  token_idcur_channelcur_recipientrW  r\  rX  r]  delta_message
base_indexr   	tool_namedelta_tool_callfn_name_returnedrb  ry   r^  delta_contentauto_tools_calledr8  latest_delta_lenexpected_callactual_callremaining_callfinish_reason_rE  choices_datafinal_usagefinal_usage_chunkfinal_usage_datanum_completion_tokens	full_textrX   rX   rY   r     sp  

















(









	



	







		




	



	
      
W
        O	
	


z6OmniOpenAIServingChat.chat_completion_stream_generatorc	                    s  t t }	d }
g }z|2 z3 d H W }|| q6 W n% tjy+   | d Y S  tyA } z| |W  Y d }~S d }~ww |d usHJ g }tdddd}| |}d }d }d }d }t	|drk|j
rkt|j
nd }|D ]m}g }|jd urt|jddsqo|d ur|j|vrtd|j d qo|jd	kr| ||||||\}}}}}n(|jd
kr| j|||dd}n|jdkr| j|||dd}n
td|j  qo|jr|j}|| qot||	|||||||d	}| jr\| jr\|D ]g}d}|jjr|jj}n7|jjr9g }|jjD ] }t	|jdr,t	|jdr,||jj d|jj d qd|}d| d}|r[d }|j t!|
j"k rN|
j"|j  j#}| jj$||||j%ddd q|S )NzClient disconnectedr   rC  ru   finishedFr6  r7  r0  r   rj  rv   zUnsupported final output type: )	r<  r>  r@  r?  rm  prompt_logprobsrA  kv_transfer_paramsri  rP   rS  rT  ()z, z[tool_calls: ]r`  )&r   ro  r   r  CancelledErrorr   r   r.   rz  r   ru   setrx  r   rw  r   r   _create_text_choicer  _create_image_choiceri  extendr   r  r-  messagery   rU  rQ  rS  rT  r  r8  r   ra  r1  r  r;  )r   r[   r   rt   rS   r   r   r   r   r  	final_resfinal_outputsr  r   r?  rm  rw   r  rA  r  response_metricsrequested_modalitiesomni_outputsr  responsechoiceoutput_texttool_call_descriptionsr_  tool_calls_strrb  rX   rX   rY   r   Z  s   


	




	z4OmniOpenAIServingChat.chat_completion_full_generatorr  rw   c           #         s$  |j }| jdkrt|}nd}g }	|jD ]}
|
j}|
j}d }|jr<|jd ur<|d us/J d| j|||j||jd}nd }| j	rt
|\}}}|jsMd }| jd url| |}|jd||d}|j}t||||jd}nt|||d}t|
j|||d ur|jrd	n|
jr|
jnd
|
jd}|	| q|r|j|
j|d\}}|jsd }nd }|
j}d}| jr| jst|jts|jdkrt|||d}n|jrt|jtu rt|trtnt  t||d t!|jj"j#|ddgd}n|jr;|jdkr;t|trtnt  |d usJ t$t%t& '|}g |D ]}t(| j|j#|d |d7 }qt|d fddt)|D |d}n|jrE|jdkrMt|||d}n|j*r|jdks]|jd u r| jr| jrz| |}W n t+y } zt,-d | .|W  Y d }~  S d }~ww |j|d ur|nd|d}|j}|jrt|||j|jd}n%|}|jrt/|jdkr|j}t|||d}nt,0d t|||d}t|
j|||rd	n|
jr|
jnd
|
j|j1rt2|
jnd d}|	| q|j3r>d}|rd|d v r|d 4d|kr|d d pd}t|t%r+d5dd  |D }|	D ]}||j6jp6d }||j6_q-|j7d usFJ t/|j7}|j8d urX|t/|j87 }t9d!d  |jD }t:|||| d"}| j;rz|j<rzt=|j<d#|_>t?|j@} |j1r|j7nd }!|jA}"|	|| |!|"fS )$Nr4  r   rI  )r1  rJ  rK  r   rL  rP   )r[   r1  )rw   reasoning_contentry   rU  )rw   r  ry   rU  r  r8  r  r:  r;  rh  r   FrV  rR  )rQ  )id_type	func_namer   r   c              
      s6   g | ]\}} | t |jtj|jd dddqS )Frd  rR  )r<  rQ  )r)   rS  r  r  
parameters)rb   r   	tool_calltool_call_classtool_call_idsrX   rY   rd   T  s    z=OmniOpenAIServingChat._create_text_choice.<locals>.<listcomp>)rw   ry   rU  r  r_   r`   r5  z~Error in chat_completion_full_generator - cannot determine if tools should be extracted. Returning a standard chat completion.)r8  r  r:  r;  rh  r1  ry   rH  rw   
c                 s   s    | ]}|d  V  qdS )r0  NrX   )rb   r   rX   rX   rY   r     r   z<OmniOpenAIServingChat._create_text_choice.<locals>.<genexpr>c                 s   s    | ]}t |jV  qd S r   )r   r1  )rb   r  rX   rX   rY   r     s    rC  rk  )Brx  rs  r   ra  r1  r:  rJ  r  r  r   r2   r  rm   extract_tool_callsry   r"   rU  r    r8  tools_calledr;  rh  r   extract_reasoningr0  r   r   r   r   r   r>   rC   r-   r)   rQ  rS  r   r  r*   validate_jsonr   r   r   r   r   r   r   r   r   r|  rD   r~  r   r  r  rA  ry  r  r.   r  r{  r+   r  r0   r  r  )#r   r[   r  r   r   rw   r   r  r  r?  r  r1  out_logprobstool_call_infor:  r  ry   r  rm   r  r  r  rU  r  r   ret_contentr  r  full_messager  num_generated_tokensrm  r  rA  r  rX   r  rY   r    sZ  	










(

z)OmniOpenAIServingChat._create_text_choicer   c              	   C   sD  g }|j }|jd jd}t|tr"|r|d }n
tj|dd}n|}| 	 
  }|jdkr7| }t|ddddd	d
}	| |	}
|
j}dt jd d  }tttjtdd  }t|||dd}	|jD ]0}|rt|jt||dd d|j|j rt!|j"nd d}nt#|jt$||	dd dd d}|%| qo|S )Nr   r   rH  )dimr   i]  wav      ?T)audio_tensorsample_rateresponse_formatspeedstream_formatbase64_encodezaudio-      )hoursrP   )r<  r  
expires_at
transcriptrz   r  rg  )rw   r   r  )&rx  ra  multimodal_outputr   r   r  torchcatfloatdetachcpunumpyndimflattenrH   create_audio
audio_datauuiduuid4hexr   r   nowr   utcr   	timestampOpenAIChatCompletionAudior!   r8  r%   rh  r|  rD   r1  r    r"   r   )r   r  rw   r[   r   r?  r  r  r  	audio_objaudio_responseaudio_base64audio_idr  r  r  rX   rX   rY   r    s`   



	

	
z*OmniOpenAIServingChat._create_audio_choicec              	   C   s  ddl m} g }|j}g }|jr|j}n|dur|jr|jd }	t|	dr|	jr|	jd}
|
durt|
|jr>|	|
 nt|
drddl
}|
   
 }|jdkrc|jd dv rc||d	}| d
krr|d |j}n||j}|jdkr|	|j|dd nD|jd dkr|	|j|ddd n/|jd dkr|	|j|dd n|jd dkr|	|j|dd nt|dr|jr|j}g }|D ]5}t }|j|dd | }W d   n1 sw   Y  t|d}|	ddd| id qt|dkr|}nt|dkr|}ndddg}ddl}| ) |jdt d d! t!j"|d"}t#$|d#| t|d$rJ|j%&d# W d   n	1 sUw   Y  t'd|dd%dd&}|	| |S )'a  Create chat completion response choices for image output.

        Converts image tensor or PIL Image output from diffusion models
        into base64-encoded image data for API response.

        Args:
            omni_outputs: Output containing image data from diffusion stage
            role: The role for the response message (e.g., "assistant")

        Returns:
            List of ChatCompletionResponseChoice with image content
        r   r   Nr  rv   r     )r   r     )r      r   r     r  L)moderH  r   RGBr  RGBAimagesPNGformatutf-8	image_urlr   data:image/png;base64,r   r)  r0  7Image generation completed but no images were produced.)r   r0  ignorepydanticcategorymodulerw   ry   __pydantic_fields_set__r  r  )(PILr   rx  r$  ra  r   r  r   r   r   r  r	  r
  r  r  shape	transposemaxastypeuint8	fromarraysqueezer   savegetvaluer   	b64encodedecoder   warningscatch_warningsfilterwarningsUserWarningr"   model_constructr=  __setattr__r3  addr    )r   r  rw   r[   r   r   r?  r  r$  completion_output
image_datanp	img_arrayimage_contentsr   bufferr   
img_base64ry   warnings_moduler  r  rX   rX   rY   r    s   







z*OmniOpenAIServingChat._create_image_choicec           2         s  zdt  jdd  }tt }g }|jD ]*}t|dr)||  qt	|t
r4|| q|t|ddt|ddd	 q| |\}}|sT| d
W S t|ddp[i }	|	d}
|	d}d|	v rz#|	d }t	|trd| v r| d\}}t|t|}}
W n ty   td|	d Y nw |	dd}|	d}|	d}|	d}|	d}|	dd}|	d}|	d}|	d}td|t|dkr|dd d n|t|dd |	 D  g }|D ],}zt|}|tt| W q ty } ztd | W Y d}~qd}~ww ||d!}t||
|||d"}|dur6||_|dur>||_ |durF||_!|durN||_"|rt	|t
rzu|d#ph|d$ph|d%}|d&p|d'p|d(p|d)}|d*} | du r|d+} |d,}!|!du r|d-}!|!du r|rt#t|}!|r|rt$t|t|!t|}"|"|_%| durt&| |_'W n ty } ztd.| W Y d}~nd}~ww |r't|dkri |d/< |d0 |d/ d1< n*t| j(d2d}#t|#d3d4}$|#du rd5}$|$ri |d/< ||d/ d1< n| jd6d7d8W S t| j(d9rVt)t*| j(}%d}&|%j+||g|d:2 z	3 dH W }'|'}&q?6 |&du rU| d;W S nt)t,| j(}%|%j+|||d<I dH }&t|&j-d=g }(g })|(D ]7}*t }+|*j.|+d>d? |+/ }W d   n	1 sw   Y  t0|1d@},|)dAdBdC|, idD qr|)sdE}-n|)}-d0dl2}.|.3 ) |.j4dFt5dGdH t6j7dIdJ}/t89|/d|- t|/dKr|/j:;d W d   n	1 sw   Y  t<j7d0|/dLdddM}0t=||| j>|0gt?t| dt| d dNdO}1tdP|t|( |1W S  ty@ } zt@dQ| | jdRt| dSd8W  Y d}~S d}~ww )Ta  Generate images via chat completion interface for diffusion models.

        Args:
            request: Chat completion request
            raw_request: Raw FastAPI request object

        Returns:
            ChatCompletionResponse with generated images or ErrorResponse
        rs   Nr  ra   rw   rx   ry   rP   rz   r{   r|   r}   r~   r   r   zInvalid size format: %snum_inference_steps2   guidance_scaletrue_cfg_scaler  r   num_outputs_per_promptr   
num_framesguidance_scale_2loraz>Diffusion chat request %s: prompt=%r, ref_images=%d, params=%sz...c                 S   s   i | ]\}}|d ur||qS r   rX   )rb   r   r   rX   rX   rY   r     s    zKOmniOpenAIServingChat._create_diffusion_chat_completion.<locals>.<dictcomp>z$Failed to decode reference image: %s)r   r   )rO  r}   r~   rS  r  rS  	lora_nameadapter
local_pathpath	lora_pathlora_local_pathscale
lora_scaleint_idlora_int_idz Failed to parse LoRA request: %sr   r   rv   	od_configsupports_multimodal_inputsFTzMultiple input images are not supported by the current diffusion model. For multi-image editing, start the server with Qwen-Image-Edit-2509 and send multiple images in the user message content.  )status_coder  )r   r   rt   z"No output generated from AsyncOmni)r   r  rt   r$  r%  r&  r(  r)  r   r*  r+  r,  r-  r.  r/  	assistantr2  r3  r  )r8  r  r;  r:  rh  rC  )r<  r>  r@  r?  rm  z2Diffusion chat completed for request %s: %d imagesz$Diffusion chat completion failed: %szImage generation failed: i  )Ar  r  r  r   ro  r   r   r   ra   r   r   r   r   _create_error_responser   r   r   r   r   r   r   infor   itemsr   r   r   r   r   r   r   rQ  rR  rT  rU  rJ   rI   r   r	  r^  rO   r   r   r   rL   rx  r<  r=  r>  r?  r@  rA  rB  rC  r"   rD  r=  rE  r3  rF  r    r   rQ   r.   r   )2r   r[   r\   rt   r  r   r   r   r   r|   r}   r~   r   r   r   rO  rQ  rR  r  r   rS  rT  rU  	lora_body
pil_imagesimg_b64r   r   
gen_prompt
gen_paramsrW  r[  r^  r`  lora_reqra  rb  rR   resultr  r$  rK  r   rL  rM  ry   rN  r  r  r  rX   rX   rY   r   s  s|  


"








	




"








	





z7OmniOpenAIServingChat._create_diffusion_chat_completionc              
   C   sV  g }g }|D ]}| dd}|dkrq| dd}t|tr$|| qt|tr|D ]s}t|tr8|| q+t|tr| ddkrN|| dd q+d|v r^d|vr^||d  q+| ddkr| di  dd}|d	rz|d
d\}	}
||
 W q+ ty   t	
d Y q+w q+d|v r||d  q+qd| }||fS )zExtract text prompt and base64 images from chat messages.

        Args:
            messages: List of chat messages

        Returns:
            Tuple of (prompt_text, list_of_base64_images)
        rw   rP   rx   ry   r   r0  r)  r   z
data:image,r   zInvalid data URL formatrv   rr   )r   r   r   r   r  r   r  r   r   r   r   r  strip)r   r   prompt_partsr$  r  rw   ry   itemr   r  b64_datar   rX   rX   rY   r   n  sD   





z:OmniOpenAIServingChat._extract_diffusion_prompt_and_imagesBadRequestErrorrc  r  err_typerd  c                 C   s   t t|||ddS )z7Create an error response following OpenAI error format.)r  r   code)r   )r(   r'   )r   r  rv  rd  rX   rX   rY   rf    s   z,OmniOpenAIServingChat._create_error_responser   )NNNNTFNF)F)ru  rc  )<__name__
__module____qualname____doc__rN   bool__annotations__rO   r   rQ   r   classmethodrZ   r   r   r   r   r(   r   r/   r3   r  r   r   r   r
   r   r=   rB   r   tupler   r;   r   r<   r   r   r  r!  r  r(  r   r6   rI   r   r   r8   AnyTokenizerr,   r9   r   r   r   rK   r  r  r  r   r   rf  rX   rX   rX   rY   rM   b   sv  
 

   

w


"

	
     ]	

~
 r
E
q
 |
:rM   )r  r   r  ro  r  collections.abcr   r   r   r   r   r   ior   typingr	   r
   r   r   r   r   r  fastapir   r4  r   r.  r   vllm.renderers.protocolr    vllm_omni.entrypoints.async_omnir   5vllm_omni.entrypoints.openai.protocol.chat_completionr   vllm_omni.inputs.datar   r   	soundfileImportError'openai.types.chat.chat_completion_audior   r  vllm.entrypoints.chat_utilsr   r   r   r   r   0vllm.entrypoints.openai.chat_completion.protocolr   r   r   r    r!   r"   /vllm.entrypoints.openai.chat_completion.servingr#   'vllm.entrypoints.openai.engine.protocolr$   r%   r&   r'   r(   r)   r*   r+   r,   r-   r.   &vllm.entrypoints.openai.engine.servingr/   r0   ,vllm.entrypoints.openai.parser.harmony_utilsr1   r2   *vllm.entrypoints.openai.responses.protocolr3   vllm.entrypoints.openai.utilsr4   vllm.entrypoints.utilsr5   vllm.inputs.datar6   vllm.loggerr7   vllm.outputsr8   vllm.reasoningr9   vllm.renderersr:   vllm.renderers.inputsr;   vllm.sampling_paramsr<   vllm.tokenizersr=   r  vllm.tokenizers.mistralr>   r?   r@   rA   vllm.tool_parsersrB   %vllm.tool_parsers.mistral_tool_parserrC   vllm.utils.collection_utilsrD   .vllm_omni.entrypoints.openai.audio_utils_mixinrE   %vllm_omni.entrypoints.openai.protocolrF   +vllm_omni.entrypoints.openai.protocol.audiorG   rH   vllm_omni.lora.requestrI   vllm_omni.lora.utilsrJ   vllm_omni.outputsrK   *vllm_omni.entrypoints.async_omni_diffusionrL   rx  r   rM   rX   rX   rX   rY   <module>   sp     4