o
    ۷i                 0   @   s  d dl Z d dlZd dlZd dlZd dlmZ d dlZd dlZd dlZd dl	Z	d dl
mZ d dlmZ d dlmZ d dlmZ d dlmZmZmZ d dlZd dlmZ d dlmZmZmZmZmZmZm Z  d dl!m"Z"m#Z# d d	l$m%Z% d d
l&m'Z'm(Z( d dl)m*Z* d dl+m,Z, d dl-m.Z. d dl/m0Z0 d dl1m2Z2 d dl3m4Z4 d dl5m6Z6 d dl7m8Z8 d dl9m:Z:m;Z;m<Z< d dl=m>Z? d dl=m@ZA zd dlBmCZC W n eDy   d dlEmCZC Y nw d dlFmGZGmHZH d dlImJZJ d dlKmLZLmMZMmNZNmOZO d dlPmQZQ d dlRmSZS d dlTmUZU d dlVmWZW d dlXmYZY d dlZm[Z[m\Z\ d d l]m^Z^ d d!l_m`Z` d d"lambZb d d#lcmdZd d d$lemfZf d d%lgmhZh d d&limjZj d d'lkmlZlmmZmmnZn d d(lompZp d d)lqmrZr d d*lsmtZt d d+lumvZv d d,lwmxZx d d-lymzZzm{Z{ d d.l|m}Z} d d/l~mZmZmZ d d0lmZmZ d d1lmZ d d2lmZ d d3lmZ d d4lmZmZmZ d d5lmZ d d6lmZ epeZe Ze Zd7ed8efd9d:ZG d;d< d<e'Z	dd=ed>ed?ee dB d8dfd@dAZdBZdd>ed?ee dB fdCdDZG dEdF dFZddGdHZdddIdJZedddKd7edLedB dMeeef dB d8ee0 fdNdOZedPdQd7edLed8ee0 fdRdSZdTe0dUe*d7ed8dfdVdWZdXed8edB fdYdZZdXed8edB fd[d\ZdXed8edB fd]d^Zejd_ee^gejjd`dai iiejjdbeLiejjdbeLiejjdbeLiidceneldXeGddefdedfZeedgdhh ejdgee^gejjd`dii iiejjdbeLiejjdbeLiejjdbeLiidceneldXe}ddefdjdkZejdlejjdbeiejjdbeLiejjdbeLiejjdbeLiidmddefdndoZeedp edpdded8e"fdqdrZeeds edsdded8e"fdtduZejdvee^gejjdbeiejjdbeLiejjdbeLiejjdbeLiidcdXedded8efdwdxZejdyejjdbeiejjdbeLiejjdbeLiejjdbeLiidmededdzd{ededd|d{ed}eded~ededededdededededededededfddedee  dB dee  dB dee dB dee dB dedbedededededB dedB deeed ddf dedB dedB dedB dedB dedB dedB dedB dedB d8ef,ddZddefddZdd Zdeeef fddZdTexeB dedee fddZdededed8dfddZded8ee fddZdee d8ee%j% fddZdedB dedB d8efddZ	dde%j%deded8efddZdedB deded8dfddZeddddedXedB fddZeddddedXedB fddZdddXeddededB d8efddZdedB d8efddńZejdejjdbeiejjdbeLiejjdbeLiejjdbeLiidmed}eddǍeddǍeddǍeddǍeddǍeddǍeddǍeddǍeddǍeddǍeddǍeddǍeddǍeddǍeddǍeddǍeddǍeddǍeddǍeddǍfddedede dB dbedB dedB dedB dedB dedB dedB dedB dedB dedB dedB dedB dedB dedB dedB dedB dedB dedB dedB dedB d8ef.dd҄ZdS )    N)	Namespace)AsyncIterator)asynccontextmanager)
HTTPStatus)	AnnotatedAnycast)	APIRouterDependsFileFormHTTPExceptionRequest
UploadFile)JSONResponseStreamingResponse)Image)	BaseModelField)State)Route)SamplingParams)EngineClient)AnthropicServingMessages)load_chat_template)
serve_http)RequestLogger)DemoToolServerMCPToolServer
ToolServer)	build_app)setup_server)base)ChatCompletionRequestChatCompletionResponse)OpenAIServingCompletion)ErrorResponse	ModelCard	ModelListModelPermission)BaseModelPath)OpenAIServingModels)metrics_header)OpenAIServingResponses)get_uvicorn_log_config)OpenAIServingTranscriptionOpenAIServingTranslation)validate_json_request)ServingClassification)OpenAIServingEmbedding)OpenAIServingPooling)ServingScores)ServingTokens)OpenAIServingTokenization)load_aware_callprocess_lora_moduleswith_cancellation)init_loggerPOOLING_TASKS)ToolParserManager)decorate_logs)	AsyncOmni)encode_image_base64
parse_size)OpenAICreateSpeechRequest)	ImageDataImageGenerationRequestImageGenerationResponse)VideoGenerationRequestVideoGenerationResponse)OmniOpenAIServingChat)OmniOpenAIServingSpeech)OmniOpenAIServingVideo)OmniDiffusionSamplingParamsOmniSamplingParamsOmniTextPrompt)LoRARequest)stable_lora_int_idargsreturnc                 C   s@   t | dd }|d urt |dd }|d urdS tjd}|d uS )Nprofiler_configprofilerTVLLM_TORCH_PROFILER_DIR)getattrosenvironget)rQ   rS   rT   	env_value r[   ]/home/ubuntu/vllm_env/lib/python3.10/site-packages/vllm_omni/entrypoints/openai/api_server.py!_should_enable_profiler_endpointsp   s   r]   c                   @   s.   e Zd ZU dZedddZee dB ed< dS )ProfileRequestz&Request model for profiling endpoints.Nz;List of stage IDs to profile. If None, profiles all stages.)defaultdescriptionstages)	__name__
__module____qualname____doc__r   ra   listint__annotations__r[   r[   r[   r\   r^   ~   s   
 r^   routerpathmethodsc                 C   sv   |r	dd |D nd }t | jD ](}t|dd |krq|d ur2dd t|dd p*t D }||@ s2q| j| qd S )Nc                 S      h | ]}|  qS r[   upper.0methodr[   r[   r\   	<setcomp>       z,_remove_route_from_router.<locals>.<setcomp>rj   c                 S   rl   r[   rm   ro   r[   r[   r\   rr      rs   rk   )rf   routesrV   setremove)ri   rj   rk   methods_setrouteroute_methodsr[   r[   r\   _remove_route_from_router   s   rz   zendpoint-load-metrics-formatc                 C   sd   g }| j D ]}t|tr$|j|kr$|du st|dr$|j|@ r$|| q|D ]}| j | q'dS )zRemove a route from the app by path and optionally by methods.

    OMNI: used to override upstream /v1/chat/completions with omni behavior.
    Nrk   )rt   
isinstancer   rj   hasattrrk   appendrv   )apprj   rk   routes_to_removerx   r[   r[   r\   _remove_route_from_app   s   

r   c                   @   s4   e Zd ZdZdee ddfddZdefddZdS )	_DiffusionServingModelsaZ  Minimal OpenAIServingModels implementation for diffusion-only servers.

    vLLM's /v1/models route expects `app.state.openai_serving_models` to expose
    `show_available_models()`. In pure diffusion mode we don't initialize the
    full OpenAIServingModels (it depends on LLM-specific processors), so we
    provide a lightweight fallback.
    base_model_pathsrR   Nc                 C   s
   || _ d S N)_base_model_paths)selfr   r[   r[   r\   __init__      
z _DiffusionServingModels.__init__c                    s   t dd | jD dS )Nc                 S   s"   g | ]}t |j|jt gd qS ))idroot
permission)r'   name
model_pathr)   )rp   
base_modelr[   r[   r\   
<listcomp>   s    zA_DiffusionServingModels.show_available_models.<locals>.<listcomp>)data)r(   r   )r   r[   r[   r\   show_available_models   s   z-_DiffusionServingModels.show_available_models)	rb   rc   rd   re   rf   r*   r   r(   r   r[   r[   r[   r\   r      s    r   c                    s\   ddl }|jddtd |jddtd td t| \}}t||| fi |I dH  dS )zRun a single-worker API server.

    Unified entry point that automatically handles both LLM and Diffusion models
    through AsyncOmni, which manages multi-stage pipelines.
    r   Nignorez.*Pydantic.*serialization.*messagecategoryz(.*PydanticSerializationUnexpectedValue.*	APIServer)warningsfilterwarningsUserWarningr?   setup_openai_serveromni_run_server_worker)rQ   uvicorn_kwargswarnings_modulelisten_addresssockr[   r[   r\   omni_run_server   s   r   c                    s  |j rt|j dkrt|j  |jr't|jdkr'ddlm} ||j t|}|dur3||d< t	||d4 I dH }t
|drMt| I dH }nd}|sSd}t||}	t|	d	d
h t|	ddh |	t t||	j|I dH  t|rtd |	t | I dH }
|
du }|rtd|  n	td|
jj|  t|	f||j|j|j|j|j  t!j"|j#|j$|j%|j&|j'|j(d|I dH }W d  I dH  n1 I dH sw   Y  z|I dH  W |)  dS |)  w )zRun a single API server worker.   r   )ReasoningParserManagerN
log_config)client_configget_supported_tasks)generate/v1/chat/completionsPOST
/v1/modelsGETzOProfiler endpoints are enabled. This should ONLY be used for local development!z4Starting vLLM API server (pure diffusion mode) on %sz!Starting vLLM API server %d on %s)r   enable_ssl_refreshhostport	log_level
access_logtimeout_keep_alivessl_keyfilessl_certfilessl_ca_certsssl_cert_reqsh11_max_incomplete_event_sizeh11_max_header_count)*tool_parser_pluginlenr>   import_tool_parserreasoning_parser_pluginvllm.reasoningr   import_reasoning_parserr.   build_async_omnir|   tupler   build_openai_appr   include_routerri   omni_init_app_statestater]   loggerwarningprofiler_routerget_vllm_configinfoparallel_config_api_process_rankr   r   r   r   uvicorn_log_leveldisable_uvicorn_access_logenvsVLLM_HTTP_TIMEOUT_KEEP_ALIVEr   r   r   r   r   r   close)r   r   rQ   r   r   r   r   engine_clientsupported_tasksr~   vllm_configis_pure_diffusionshutdown_taskr[   r[   r\   r      s|   




(>r   ) disable_frontend_multiprocessingr   r   r   c             	   C  s   t ddkr!td td tdg t  td t	| |d4 I dH }|V  W d  I dH  dS 1 I dH s@w   Y  dS )aV  Build an AsyncOmni instance from command-line arguments.

    Creates an async context manager that yields an AsyncOmni instance
    configured from the provided arguments. Handles forkserver setup if
    needed and ensures proper cleanup on exit.

    Args:
        args: Parsed command-line arguments containing model and configuration
        disable_frontend_multiprocessing: Optional flag to disable frontend
            multiprocessing (deprecated in V1)
        client_config: Optional client configuration dictionary

    Yields:
        EngineClient instance (AsyncOmni) ready for use
    VLLM_WORKER_MULTIPROC_METHOD
forkserverz!Setup forkserver with pre-importszvllm.v1.engine.async_llmzForkserver setup complete!r   N)
rW   getenvr   debugmultiprocessingset_start_methodset_forkserver_preloadr   ensure_running"build_async_omni_from_stage_config)rQ   r   r   
async_omnir[   r[   r\   r   .  s   


.r   Fr   c                C  sn   |rt d d}z$t|  }|dd tdd| ji|}|V  W |r-|  dS dS |r6|  w w )a  Create AsyncOmni from stage configuration.

    Creates an AsyncOmni instance either in-process or using multiprocess
    RPC. Loads stage configurations from the model or from a specified path.

    Args:
        args: Parsed command-line arguments containing model and stage configs
        disable_frontend_multiprocessing: Flag to disable frontend multiprocessing
            (deprecated in V1)
        client_config: Optional client configuration dictionary

    Yields:
        EngineClient instance (AsyncOmni) ready for use

    Note:
        Stage configurations are loaded from args.stage_configs_path if provided,
        otherwise from the model's default configuration.
    z:V1 is enabled, but got --disable-frontend-multiprocessing.Nmodelr[   )r   r   varscopypopr@   r   shutdown)rQ   r   r   kwargsr[   r[   r\   r   V  s   

r   r   r   c                    s  |   I dH }d}t| dr.| jr.| j}t|dkr.|d dd}|dkr.d	}td
  jdur7 j}n jg} j	rEt
 jd}nd} fdd|D }	| |_ j |_ |_t| drc| jnd|_|r|rn|d n j}
d|_| |_t|	|_d|_tj| |
d|_t| dr| jnd}tj| |
|d|_t dd|_d|_td|
 dS |du r|   I dH }|du rtd ||_dh}t| drt|  I dH }td| t  j!} j"dkrt# }t$|t#sJ |% I dH  n j"rt& }|' j"I dH  nd}|dur|j(dur|j(j)ni }t* j+|}t| drD| j,du sDt| drD| j-du sDt| drD| j.du r|durzrddl/m0} ddl1m2} | 3 I dH }|durt| drn| j,du ry||d| _,td t| dr| j.du r|j.| _.td t| dr| j-du rt| dr| j.n|j.}|j4}|||| _-td ntd  W n t5y } ztd!| W Y d}~n
d}~ww td" t6| |	|d#|_|j7 I dH  d|v rt8| |j|| j9 j: j; j<| j=j> j? j@ jA jBd$nd|_Cd|v r<t| |j jDf|| j9 jE jF j: j; jG j< j=j> j? j@ jA jH jBd%nd|_|jdurM|jI I dH  d|v rbtJ| |j| j: j? j@ jBd&nd|_KtLd'd( |D rtM| |j||| j9 jF jBd)nd|_Nd*|v rtO| |j|| j9 jF jBd+nd|_Pd,|v rtQ| |j|| j9 jF jBd+nd|_Rd*|v sd-|v rtS| |j|| jBd.nd|_TtU| |j|| j9 jF jBd+|_d/|v rtV| |j| jB j@d0nd|_Wd/|v rtX| |j| jB j@d0nd|_Yd|v rtZ| |j jD|| j9 j: j; j< j=j> j? j@d1nd|_[d|v r9t\| |j| j: jB j? jA j]d2nd|_^t_| |j|d3|_`t| |rN|d nd|jd4|_ j|_d|_dS )5a  Initialize the FastAPI application state for omni API server.

    Sets up the application state with model information, request logger,
    and other server configuration needed for handling API requests.
    Automatically detects pure diffusion mode (single diffusion stage) and
    handles it appropriately.

    Args:
        engine_client: Engine client instance (AsyncOmni)
        state: FastAPI application state object to initialize
        args: Parsed command-line arguments
    NFstage_configs   r   
stage_typellm	diffusionTz5Detected pure diffusion mode (single diffusion stage))max_log_lenc                    s   g | ]	}t | jd qS ))r   r   )r*   r   )rp   r   rQ   r[   r\   r     s    z'omni_init_app_state.<locals>.<listcomp>)diffusion_engine
model_name)r   r   r   enable_server_load_trackingz3Pure diffusion API server initialized for model: %sz9vllm_config is None, some features may not work correctlyr   r   zSupported tasks: %sdemoinput_processorio_processormodel_config)get_io_processor)OmniInputProcessor)r   z)Initialized input_processor for AsyncOmniz&Initialized model_config for AsyncOmniz&Initialized io_processor for AsyncOmnizNCannot initialize processors: tokenizer is None. OpenAIServingModels may fail.zPFailed to initialize processors for AsyncOmni: %s. OpenAIServingModels may fail.zPCannot initialize processors: vllm_config is None. OpenAIServingModels may fail.)r   r   lora_modules)request_loggerchat_templatechat_template_content_formatreturn_tokens_as_token_idsenable_auto_toolstool_parsertool_serverreasoning_parserenable_prompt_tokens_detailsenable_force_include_usageenable_log_outputslog_error_stack)r   r   r   default_chat_template_kwargstrust_request_chat_templater   r   #exclude_tools_when_tool_choice_noner   r   r   r   r   enable_log_deltasr  )r   r   r   r   r  c                 s   s    | ]}|t v V  qd S r   r<   )rp   taskr[   r[   r\   	<genexpr>u  s    z&omni_init_app_state.<locals>.<genexpr>)r   r   r   r   r  r  embed)r   r   r   r  r  classifyscore)r   score_templater  transcription)r   r  r   )	r   r   r   r   r   r   r   r   r   )r   r   r  r   r   force_no_detokenize)r   )r   r   )ar   r|   r   r   rY   r   r   served_model_namer   enable_log_requestsr   r   r   disable_log_stats	log_statsrQ   r   r   r   openai_serving_modelsopenai_serving_tokenizationrI   for_diffusionopenai_serving_chatrK   openai_serving_videorV   r   server_load_metricsr   ru   r   r   r   r   r   r{   init_and_validater   add_tool_serverlora_configdefault_mm_lorasr9   r   r   r   r   vllm.plugins.io_processorsr    vllm_omni.engine.input_processorr   get_tokenizerio_processor_plugin	Exceptionr+   init_static_lorasr-   r   r   enable_auto_tool_choicetool_call_parserstructured_outputs_configr   r   r   r   r  openai_serving_responsesresponse_roler  r  r  r  warmupr%   openai_serving_completionanyr4   openai_serving_poolingr3   openai_serving_embeddingr2   openai_serving_classificationr5   openai_serving_scoresr7   r/   openai_serving_transcriptionr0   openai_serving_translationr   anthropic_serving_messagesr6   tokens_onlyserving_tokensrJ   openai_serving_speech)r   r   rQ   r   r   r   r   served_model_namesr   r   r   diffusion_stage_configsr   resolved_chat_templater   r  r   r   r   	tokenizerr   r  er[   r   r\   r     s:  

















)










r   requestc                 C   
   | j jjS r   )r~   r   r  r9  r[   r[   r\   	Omnivideo  r   r<  c                 C   r:  r   )r~   r   r  r;  r[   r[   r\   Omnichat  r   r=  c                 C   r:  r   )r~   r   r3  r;  r[   r[   r\   
Omnispeech  r   r>  r   contenttext/event-streamr   )dependencies	responsesraw_requestc                    s@  |j td}t|}|d u r*t|jjdd }|d u r$ttj	j
dd|jddS z|| |I d H }W n tyR } ztd| ttjj
t|d|d }~ww t|trit| |jre|jjdS ddS t|trd	d l}d	d l}|  |jd
td |jd
dtd z|jdddd}	t|	t|dW W  d    S  ty	   z|jddd}
| |
}	t|	t|dW  Y W  d    S  ty   | ' |jd
td t|jdddt|dW  d     Y  Y W  d    S 1 sw   Y  Y nw Y nw W d    n	1 sw   Y  t!|ddS )N r  z/The model does not support Chat Completions APIstatus_codedetailr   zChat completion failed: %s  r?  rF  r   r   )r   z.*Pydantic.*r   jsonTnone)modeserialize_as_anyr   )r?  headers)r   rN  )rM  r   r@  )r?  
media_type)"rO  rY   )ENDPOINT_LOAD_METRICS_FORMAT_HEADER_LABELr=  rV   r~   r   r   r   	NOT_FOUNDvaluecreate_error_responsecreate_chat_completionr   r   	exceptionINTERNAL_SERVER_ERRORstrr{   r&   r   
model_dumperrorcoder$   rK  r   catch_warningsr   r   r,   model_dump_jsonloadsr   )r9  rC  metrics_header_formathandlerbase_server	generatorr8  json_libr   response_dictresponse_jsonr[   r[   r\   rU    s~   





rU  z/v1/audio/speechr   zaudio/*c              
      s   t |}|d u r#t|jjdd }|d u rttjjdd|jddS z
|	| |I d H W S  t
yD } zttjjt|d|d }~ww )Nr  %The model does not support Speech APIrE  rH  )r>  rV   r~   r   r   r   rR  rS  rT  create_speechr   rW  rX  )r9  rC  r`  ra  r8  r[   r[   r\   rg  C  s    rg  z/v1/audio/voices)rB  c                    sD   t | }|du rt| jddS |jrt|jng }td|idS )z9List available TTS voices/speakers from the loaded model.Nrf  rH  voicesr?  )r>  r"   rT  supported_speakerssortedr   )rC  r`  speakersr[   r[   r\   list_voices_  s   rm  z/healthc                    s   t | jjdd}|dur't|dr|jrtddidS tddd	tjjd
S t | jjdd}|durA|	 I dH  tddidS tddd	tjjd
S )zHealth check endpoint that works for both LLM and diffusion modes.

    Returns 200 OK if the server is healthy.
    For LLM mode: delegates to engine_client health check
    For diffusion mode: checks if diffusion_engine is running
    r   N
is_runningstatushealthyri  	unhealthyzDiffusion engine is not running)ro  reasonrJ  r   zNo engine initialized)
rV   r~   r   r|   rn  r   r   SERVICE_UNAVAILABLErS  check_health)rC  r   r   r[   r[   r\   healthz  s"   	ru  r   c                    s|   t | jjdd}|durtd|dddg dgdd	S t | jjd
d}|dur6| I dH }t| d	S tdg dd	S )zShow available models endpoint that works for both LLM and diffusion modes.

    Returns model information in OpenAI-compatible format.
    diffusion_model_nameNrf   r   r   z	vllm-omni)r   objectcreatedowned_byr   )rw  r   ri  r  )rV   r~   r   r   r   rY  )rC  rv  r  modelsr[   r[   r\   r     s*   r   z/v1/images/generationsc              
      s\  t |\}}}| jdur| j|krtd| j d| d zd| ji}| jdur/| j|d< t| jd}t| j	\}}t
|d| t
|d	| d
\}	}
| jr^t| j\}	}
|	 d|
 }nd}t
|d|	 t
|d|
 t
|d| j t
|d| j t
|d| j t
|d| jdur| jntdd t
|d| j dt j }td| j d|  t|||||dI dH }|du rttjjddt|}tdt| d dd |D }tt t!! |d W S  ty     t"y } zt#d!|  ttj$jt%|dd}~w t&y- } zt'd"|  ttjjd"t%| dd}~ww )#a  Generate images from text prompts using diffusion models.

    OpenAI DALL-E compatible endpoint for text-to-image generation.
    Only supports multi-stage omni mode with diffusion stages.

    Args:
        request: Image generation request with prompt and parameters
        raw_request: Raw FastAPI request for accessing app state

    Returns:
        ImageGenerationResponse with generated images as base64 PNG

    Raises:
        HTTPException: For validation errors, missing engine, or generation failures
    N#Model mismatch: request specifies '' but server is running ''. Using server model.promptnegative_prompt)num_outputs_per_promptlora_request
lora_scaleNNxzmodel defaultwidthheightnum_inference_stepsguidance_scaletrue_cfg_scaleseedr       generator_deviceimg_gen_Generating 
 image(s) r   
gen_paramsstage_typesr~  
request_id.No output generated from multi-stage pipeline.rE  Successfully generated 	 image(s)c                 S   s   g | ]
}t t|d dqS )Nb64_jsonrevised_prompt)rD   rA   rp   imgr[   r[   r\   r   %      z#generate_images.<locals>.<listcomp>)rx  r   Validation error: zImage generation failed: )(_get_engine_and_modelr   r   r   r~  r  rL   n_parse_lora_requestlora_update_if_not_nonesizerB   r  r  r  r  randomrandintr  uuiduuid4hexr   _generate_with_async_omnir   r   rW  rS  _extract_images_from_resultr   rF   rg   time
ValueErrorrZ  BAD_REQUESTrX  r   rV  )r9  rC  r   r   r  r~  r  r  r  r  r  size_strr  resultimages
image_datar8  r[   r[   r\   generate_images  s   




r  z/v1/images/editszimage[])aliaszurl[].r   autor  pngd   imageimage_arrayurl	url_arrayr~  r  r  response_formatoutput_format
backgroundoutput_compression)geleuserr  r  r  r  r  r  r  c           +   
      s`  t | \}}}|dur||krtd| d| d t||	dkr-ttjjddz9d|i}|dur;||d	< g }|p@|}|pD|}|rL|| |rS|| |s[td
ddt	|I dH }i |d< ||d d< t
 }t| jjdd}t|dd}dd t|D d }t||t| t|d| t|} t| \}!}"t|d|! t|d|" t|dd}#d\}$}%| dkr|d j\}$}%nt|\}$}%|#dur|$|% |#krttjjd|$ d|% d|# dd|$ d|% }&t|d|$ t|d|% t|d| t|d | t|d!| t|d"|dur|ntdd# t|d$| d%tt  }'td&| d'|&  t|||||'d(I dH }(t|(}td)t| d*  fd+d|D })ttt |)|&d,W S  typ     t y }* zt!d-|*  ttjjt|*dd}*~*w t"y }* zt#d.|*  ttj$jd.t|* dd}*~*ww )/z0
    OpenAI-compatible image edit endpoint.
    Nr{  r|  r}  r  z1Only response_format 'b64_json' is supported now.rE  r~  r  i  z"Field 'image' or 'url' is requiredmulti_modal_datar  rQ   default_sampling_paramsc                 S   s   g | ]
\}}|d kr|qS r   r[   )rp   itr[   r[   r\   r     r  zedit_images.<locals>.<listcomp>r   r  r  r  max_generated_image_sizer  r  zRequested image size r  z% exceeds the maximum allowed size of z pixels.r  r  r  r  r  r  r  r  	img_edit_r  r  r  r  r  c                    s"   g | ]}t t| d ddqS ))formatr  Nr  )rD   %_encode_image_base64_with_compressionr  r  r  r[   r\   r     s    )rx  r   r  r  r  zImage edit failed: )%r  r   r   _choose_output_formatr   r   r  rS  extend_load_input_imagesrL   rV   r~   r   	enumerate#apply_stage_default_sampling_paramsrX  r  _get_lora_from_json_strr  lowerr  rB   r  r  rg   r  r   r  r  r   rF   r  rZ  r   rV  rW  )+rC  r  r  r  r  r~  r   r  r  r  r  r  r  r  r  r  r  r  r  r  r  r   r   r  input_images_listr  urls
pil_imagesr  app_state_argsdefault_sample_paramdiffusion_stage_id	lora_dictr  r  r  r  r  r  r  r  r  r8  r[   r  r\   edit_images9  s   &


"	

r  c           	   
   C   sL  t | jjdd }|d u st|dsttjjddt | jjdd }|s+ttjjddd}g }|D ]J}d }t|t	rA|
dd	}n/t|d
rM|
dd	}n#t|drV|j}nzd|v r_|d nd	}W n ttfyo   d	}Y nw |dkrvd}|| q1|sttjjddt | jjdd }|rt|dr|jr|jd j}nd}|||fS )Nr   
stage_listzOMulti-stage engine not initialized. Start server with a multi-stage omni model.rE  r   zDStage configs not found. Start server with a multi-stage omni model.Fr   r   rY   r   Tz1No diffusion stage found in multi-stage pipeline.r  r   r   unknown)rV   r~   r   r|   r   r   rs  rS  r{   dictrY   r   	TypeErrorKeyErrorr}   r   r   )	rC  r   r   has_diffusion_stager  stager   serving_modelsr   r[   r[   r\   r    sP   



r  c                 C   sR   | d u rd S zt | }W n t jy   tdddw t|ts'tddd|S )NrI  zInvalid LoRA JSON stringrE  zLoRA must be a JSON object)rK  r^  JSONDecodeErrorr   r{   r  )	lora_bodyr  r[   r[   r\   r    s   
r  r  c                 C   s   | d urvt | tsttjjdd| dp| dp| d}| dp3| dp3| dp3| d	}| d
}|d u rB| d}| d}|d u rP| d}|d u r\|r\tt|}|r`|shttjjddt	t|t
|t||fS dS )Nz'Invalid lora field: expected an object.rE  r   	lora_nameadapter
local_pathrj   	lora_pathlora_local_pathscaler  int_idlora_int_idz5Invalid lora object: both name and path are required.r  )r{   r  r   r   r  rS  rY   rP   rX  rO   rg   )r  r  r  r  r  r[   r[   r\   r    s:   





r  r  r  c                    s   t t| } d }t| dd }t|trtt| dd }t|ts&dd |D }nt|}t|t|krA|dd |D  d t| }g }t|D ]\}}	|	dkrU|| qG|| }
||
 qG| jd
d|i|2 z3 d H W }|}qi6 n| jd
d|gi|I d H }|d u rt	t
jjdd	|S )Nr  default_sampling_params_listc                 S       g | ]}|d krt  nt qS r  rL   r   rp   str[   r[   r\   r   M  s    z-_generate_with_async_omni.<locals>.<listcomp>c                 S   r  r  r  r  r[   r[   r\   r   U  s     r   sampling_params_listr  rE  r[   )r   r@   rV   r{   rf   r   r  r}   r   r   r   rW  rS  )r   r  r  r   r  r  default_params_listr  idxr   base_paramsoutputr[   r[   r\   r  ?  sV   



r  rw  keyvalc                 C   s   |d urt | || d S d S r   )setattr)rw  r  r  r[   r[   r\   r  s  s   r  r  c                 C   sh   g }t | dr| jr| j}|S t | dr2| j}t|tr'|dr'|d }|S t |dr2|jr2|j}|S )Nr  request_output)r|   r  r  r{   r  rY   )r  r  r  r[   r[   r\   r  x  s   
r  inputsc                    s  t | tr	| g} g }| D ]}t |trI|drIz|dd\}}t|}tt	|}|
| W q tyH } ztd| d}~ww t |tr|drtjdd4 I dH >}z||I dH }	|	  tt	|	j}|
| W n ty } z
td	| d
| d}~ww W d  I dH  n1 I dH sw   Y  qt|drz| I dH }
tt	|
}|
| W q ty } ztd| d}~ww td| |std|S )z)
    convert to PIL.Image.Image list
    z
data:image,r   zInvalid base64 image: Nhttp<   )timeoutz"Failed to download image from URL z: filezFailed to open uploaded file: zUnsupported input: zNo valid input images found)r{   rX  
startswithsplitbase64	b64decoder   openioBytesIOr}   r   r  httpxAsyncClientrY   raise_for_statusr?  r|   read)r  r  inp_b64_dataimage_bytesr  r8  clientrespimg_datar[   r[   r\   r    sT   

(

r  c                 C   s0   | pd  }|dv r|S |pd  dkrdS dS )NrD  >   jpgr  jpegwebpr  transparentr  r  )r  )r  r  fmtr[   r[   r\   r    s   r  r  c                 C   sv   t  }i }|dv r||d< n|dkr!tdtdd|d  |d< | j|fd|i| |d t| 	d	S )
a  Encode PIL Image to base64 PNG string.

    Args:
        image: PIL Image object
        format: Output image format (e.g., "PNG", "JPEG", "WEBP")
        output_compression: Compression level (0-100%), 100 for best quality
    Returns:
        Base64-encoded image as string
    )r  r  r  qualityr  r   	      compress_levelr  zutf-8)
r  r  maxminsaveseekr  	b64encoder  decode)r  r  r  buffersave_kwargsr[   r[   r\   r    s   

r  default_params_jsonsampling_params	stage_keyc                 C   sV   | dur%t | }||v r'|| }| D ]\}}t||r$t||| qdS dS dS )a  
    Update a stage's sampling parameters with vLLM-Omni defaults.

    Args:
        default_params_json: JSON string of stage-keyed default parameters
        sampling_params: The sampling parameters object to update
        stage_key: The stage ID/key in the pipeline
    N)rK  r^  itemsr|   r  )r$  r%  r&  default_params_dictstage_defaults
param_nameparam_valuer[   r[   r\   r    s   

r  z/start_profilec              
         z*|r|j nd}td|r|nd | jjj}|j|dI dH }td t|dW S  tyJ } zt	d| t
tjjdt| d	d}~ww )
a$  Start profiling for the engine.

    Args:
        request: Optional request body with stages to profile.
            - stages: List of stage IDs to profile. If None, profiles all stages.

    Example:
        POST /start_profile
        {"stages": [0, 1]}  # Profile only stages 0 and 1
    Nz Starting profiler for stages: %sallra   zProfiler started.ri  zFailed to start profiler: %szFailed to start profiler: rE  )ra   r   r   r~   r   r   start_profiler   r   rV  r   r   rW  rS  rX  rC  r9  ra   r   r  r8  r[   r[   r\   r/       

r/  z/stop_profilec              
      r,  )
a4  Stop profiling for the engine.

    Args:
        request: Optional request body with stages to stop profiling.
            - stages: List of stage IDs to stop profiling. If None, stops all stages.

    Example:
        POST /stop_profile
        {"stages": [0, 1]}  # Stop profiling only stages 0 and 1
    Nz Stopping profiler for stages: %sr-  r.  zProfiler stopped.ri  zFailed to stop profiler: %szFailed to stop profiler: rE  )ra   r   r   r~   r   r   stop_profiler   r   rV  r   r   rW  rS  rX  r0  r[   r[   r\   r2    r1  r2  input_reference_bytesr4  c             
      s   t |}|d u rttjjddtdt|j z|j	| ||dI d H W S  ty.     t
yL } ztd| ttjjdt| dd }~ww )Nz)Video generation handler not initialized.rE  zVideo generation handler: %sr3  zVideo generation failed: %szVideo generation failed: )r<  r   r   rs  rS  r   r   typerb   generate_videosr   rV  rW  rX  )r9  rC  r4  r`  r8  r[   r[   r\   _run_video_generation%  s(   r7  rS  c              
   C   sN   | d u s| dkr
d S zt | W S  t jy& } z	ttjjdd|d }~ww )NrD  zInvalid JSON in form field.rE  )rK  r^  r  r   r   r  rS  )rS  excr[   r[   r\   _parse_form_json>  s   r9  z
/v1/videos)r_   input_referencesecondsr  r  
num_framesfpsguidance_scale_2boundary_ratio
flow_shiftc                    s   |dur|  I dH nd}i d|d|d|d|d|d|ddd	|d
|	d|
d|d|d|d|d|d|d||||t|d}dd | D }tdi |}t|| |dI dH S )z9OpenAI-style video create endpoint (multipart form-data).Nr~  r   r;  r  r  r  r:  r  r  r  r<  r=  r  r  r>  r?  r@  )r  r  r  r  c                 S   s   i | ]\}}|d ur||qS r   r[   )rp   kvr[   r[   r\   
<dictcomp>  s    z create_video.<locals>.<dictcomp>r3  r[   )r  r9  r'  rG   r7  )rC  r~  r:  r   r  r;  r  r  r  r  r  r<  r=  r  r  r>  r?  r@  r  r  r  r  r4  request_datar9  r[   r[   r\   create_videoJ  sZ   "	
rE  r   )rR   N)r  r  )r  r  rK  r   multiprocessing.forkserverr   rW   r  r  r  argparser   collections.abcr   
contextlibr   r  r   typingr   r   r   r  	vllm.envsr   fastapir	   r
   r   r   r   r   r   fastapi.responsesr   r   PILr   pydanticr   r   starlette.datastructuresr   starlette.routingr   vllmr   vllm.engine.protocolr   "vllm.entrypoints.anthropic.servingr   vllm.entrypoints.chat_utilsr   vllm.entrypoints.launcherr   vllm.entrypoints.loggerr    vllm.entrypoints.mcp.tool_serverr   r   r   "vllm.entrypoints.openai.api_serverr    r   r!   r   +vllm.entrypoints.serve.instrumentator.basicr"   ModuleNotFoundError(vllm.entrypoints.openai.basic.api_router0vllm.entrypoints.openai.chat_completion.protocolr#   r$   *vllm.entrypoints.openai.completion.servingr%   'vllm.entrypoints.openai.engine.protocolr&   r'   r(   r)   'vllm.entrypoints.openai.models.protocolr*   &vllm.entrypoints.openai.models.servingr+   $vllm.entrypoints.openai.orca_metricsr,   )vllm.entrypoints.openai.responses.servingr-   $vllm.entrypoints.openai.server_utilsr.   .vllm.entrypoints.openai.speech_to_text.servingr/   r0   vllm.entrypoints.openai.utilsr1   )vllm.entrypoints.pooling.classify.servingr2   &vllm.entrypoints.pooling.embed.servingr3   (vllm.entrypoints.pooling.pooling.servingr4   &vllm.entrypoints.pooling.score.servingr5   %vllm.entrypoints.serve.disagg.servingr6   'vllm.entrypoints.serve.tokenize.servingr7   vllm.entrypoints.utilsr8   r9   r:   vllm.loggerr;   
vllm.tasksr=   vllm.tool_parsersr>   vllm.utils.system_utilsr?    vllm_omni.entrypoints.async_omnir@   ,vllm_omni.entrypoints.openai.image_api_utilsrA   rB   +vllm_omni.entrypoints.openai.protocol.audiorC   ,vllm_omni.entrypoints.openai.protocol.imagesrD   rE   rF   ,vllm_omni.entrypoints.openai.protocol.videosrG   rH   )vllm_omni.entrypoints.openai.serving_chatrI   +vllm_omni.entrypoints.openai.serving_speechrJ   *vllm_omni.entrypoints.openai.serving_videorK   vllm_omni.inputs.datarL   rM   rN   vllm_omni.lora.requestrO   vllm_omni.lora.utilsrP   rb   r   ri   r   boolr]   r^   rX  ru   rz   rQ  r   r   r   r   r  r   r   r   r<  r=  r>  postOKrS  r  rR  rW  rU  rg  rY   rm  ru  r   rs  r  rf   rg   floatr  r  r  r  r  r)  r  r  r  r  r  r  r/  r2  bytesr7  r9  rE  r[   r[   r[   r\   <module>   s  $


S'.
  h
=
	

 &
j





	
 8!
4
2


	
