o
    پi                     @   s  d dl Z d dlZd dlZd dlZd dlmZ d dlmZ d dlZd dl	m
Z
mZmZ d dlmZ d dlmZ d dlmZmZ d dlmZ d d	lmZ d d
lmZ d dlmZmZ d dlmZ d dlm Z m!Z! d dl"m#Z# ertd dl$m%Z% e#e&Z'dZ(ej)*ddZ+edefddZ,e
 Z-e-*ddd Z.e-j*ddddefddZ/e-*dd d! Z0d"d# Z1d$e2fd%d&Z3d'd(d)efd*d+Z4e
 Z5e56e+d,efd-d.Z7d/e fd0d1Z8dS )2    N)asynccontextmanager)TYPE_CHECKING)	APIRouterFastAPIRequest)ORJSONResponse)SamplingParams)	image_api	video_api)VertexGenerateReqInput)build_sampling_params)weights_api)prepare_requestsave_outputs)async_scheduler_client)
ServerArgsget_global_server_args)init_logger)Reqi   AIP_PREDICT_ROUTEz/vertex_generateappc                 C  sV   ddl m}m} | jj}|| t||}d V  t	d |
  |  d S )Nr   )r   run_zeromq_brokerzFastAPI app is shutting down...).sglang.multimodal_gen.runtime.scheduler_clientr   r   stateserver_args
initializeasynciocreate_taskloggerinfocancelclose)r   r   r   r   broker_task r#   i/home/ubuntu/.local/lib/python3.10/site-packages/sglang/multimodal_gen/runtime/entrypoints/http_server.pylifespan&   s   

r%   z/healthc                      
   ddiS Nstatusokr#   r#   r#   r#   r$   health@   s   r*   z/modelsT)
deprecatedrequestc                    sd   ddl m} | jjj}||j}|j|j|jjj	|jj
|jjd}|r0|jj|d< |jj|d< |S )z
    Get information about the model served by this server.

    .. deprecated::
        Use /v1/models instead for OpenAI-compatible model discovery.
        This endpoint will be removed in a future version.
    r   )get_model_info)
model_pathnum_gpus	task_typedit_precisionvae_precisionpipeline_namepipeline_class)sglang.multimodal_gen.registryr-   r   r   r   r.   r/   pipeline_configr0   namer1   r2   pipeline_clsr3   __name__)r,   r-   r   
model_inforesponser#   r#   r$   
get_modelsE   s   	

r<   z/health_generatec                      r&   r'   r#   r#   r#   r#   r$   health_generateb   s   r=   c                 C   sH   t | tjrdS t | trdd |  D S t | tr"dd | D S | S )z<Recursively converts Tensors to None for JSON serialization.Nc                 S   s   i | ]	\}}|t |qS r#   make_serializable).0kvr#   r#   r$   
<dictcomp>m   s    z%make_serializable.<locals>.<dictcomp>c                 S   s   g | ]}t |qS r#   r>   )r@   rB   r#   r#   r$   
<listcomp>o   s    z%make_serializable.<locals>.<listcomp>)
isinstancetorchTensordictitemslist)objr#   r#   r$   r?   h   s   

r?   	file_pathc                 C   sR   t j| sd S t| d}t| dW  d    S 1 s"w   Y  d S )Nrbzutf-8)ospathexistsopenbase64	b64encodereaddecode)rL   fr#   r#   r$   encode_video_to_base64s   s
   $rW   req_objr   spc              
      s(  zrt | I dH }|jdu r|jdu rtd|jr!|jd  n|  t|jd g|j|jd fdd|j	|j
d t|drF| }nt|trM|nt|} rotd	  t }|ro||d
< |dd |dd t|W S  ty } ztjd|dd dt|iW  Y d}~S d}~ww )z7Forwards request to scheduler and processes the result.Nz$Model generation returned no output.r   Tc                    s    S )Nr#   )_idxoutput_file_pathr#   r$   <lambda>   s    z&forward_to_scheduler.<locals>.<lambda>)audioaudio_sample_rate
model_dumpzProcessing output file: %soutput
video_datavideo_tensorzError during generation: %s)exc_infoerror)r   forwardra   output_file_pathsRuntimeErrorr\   r   	data_typefpsr^   r_   hasattrr`   rE   rH   varsr   r   rW   popr?   	Exceptionre   str)rX   rY   r;   data	b64_videoer#   r[   r$   forward_to_schedulerz   sB   





rs   
vertex_reqc           	         s   | j s
tdg iS t }| jpi }g }| j D ]K}dt  }t||dp+|d|dp4|d|dt|d|d	|d
|d|d|dd
}t	||d}|
t|| qtj| I d H }td|iS )Npredictionsvertex_prompttextimage	image_urlseed
num_framesrj   widthheightguidance_scalesave_output)	rw   
image_pathr{   r|   rj   r}   r~   r   r   )sampling_params)	instancesr   r   
parametersuuiduuid4r   getDEFAULT_SEEDr   appendrs   r   gather)	rt   r   paramsfuturesinstridrY   backend_reqresultsr#   r#   r$   vertex_generate   s0   


r   r   c                 C   sf   t td}|t |t ddlm} ||j |tj |t	j |t
j | |j_|S )z@
    Create and configure the FastAPI application instance.
    )r%   r   )
common_api)r   r%   include_routerhealth_routervertex_router0sglang.multimodal_gen.runtime.entrypoints.openair   routerr	   r
   r   r   r   )r   r   r   r#   r#   r$   
create_app   s   


r   )9r   rR   rN   r   
contextlibr   typingr   rF   fastapir   r   r   fastapi.responsesr   4sglang.multimodal_gen.configs.sample.sampling_paramsr   r   r	   r
   9sglang.multimodal_gen.runtime.entrypoints.openai.protocolr   6sglang.multimodal_gen.runtime.entrypoints.openai.utilsr   7sglang.multimodal_gen.runtime.entrypoints.post_trainingr   /sglang.multimodal_gen.runtime.entrypoints.utilsr   r   r   r   )sglang.multimodal_gen.runtime.server_argsr   r   1sglang.multimodal_gen.runtime.utils.logging_utilsr   ;sglang.multimodal_gen.runtime.pipelines_core.schedule_batchr   r9   r   r   environr   VERTEX_ROUTEr%   r   r*   r<   r=   r?   ro   rW   rs   r   postr   r   r#   r#   r#   r$   <module>   sV   


-!