o
    پi                     @   s   d dl Z d dlZd dlmZmZmZ d dlZd dlZd dlm	Z	 d dl
mZ d dlmZ d dlmZmZ dede jfd	d
ZG dd de	ZdS )    N)ListOptionalTuple)
EngineBase)launch_server)
ServerArgs)MultiprocessingSerializerkill_process_treeserver_argsreturnc              	   C   s   t jt| fd}|  |  }d}t }t T}t | |k rgz$dd| j	 d}|j
| d|d}|jdkrF|W W  d    S W n
 tjyQ   Y nw | sZtd	td
 t | |k s#W d    n1 sqw   Y  |  td)N)targetargsg     r@zapplication/json; charset=utf-8zBearer )zContent-TypeAuthorizationz/health_generate)headers   z'Server process terminated unexpectedly.   z1Server failed to start within the timeout period.)multiprocessingProcessr   starturltimeperf_counterrequestsSessionapi_keygetstatus_codeRequestExceptionis_alive	Exceptionsleep	terminateTimeoutError)r
   pbase_urltimeout
start_timesessionr   response r)   ]/home/ubuntu/.local/lib/python3.10/site-packages/sglang/srt/entrypoints/http_server_engine.pylaunch_server_process   s6   



r+   c                   @   s   e Zd ZdZdd Zddedee fddZ			dd
e	e
eejf  dee defddZdd Z												dddZdd Zdd Zdd ZdS )HttpServerEngineAdapterz
    You can use this class to launch a server from a VerlEngine instance.
    We recommend using this class only you need to use http server.
    Otherwise, you can use Engine directly.
    c                 K   s<   t di || _td| jj d| jj  t| j| _d S )Nz#Launch HttpServerEngineAdapter at: :r)   )r   r
   printhostportr+   process)selfkwargsr)   r)   r*   __init__4   s
   z HttpServerEngineAdapter.__init__Nendpointpayloadc                 C   s@   d| j j d| j j d| }tj||pi d}|  | S )a  Make a POST request to the specified endpoint with the given payload.
        Args:
            endpoint: The API endpoint to call
            payload: The JSON payload to send (default: empty dict)
        Returns:
            The JSON response from the server
        zhttp://r-   /)json)r
   r/   r0   r   postraise_for_statusr8   )r2   r5   r6   r   r(   r)   r)   r*   _make_request;   s   z%HttpServerEngineAdapter._make_requestFnamed_tensorsload_formatflush_cachec                    s*   |  d fddt| jjD ||dS )aZ  
        Update model weights from tensor data. The HTTP server will only post meta data, and the real weights will be copied directly from GPUs.
        Note: The model should be on GPUs rather than CPU for this functionality to work properly.
        If you encounter issues, ensure your model is loaded on GPU devices rather than CPU.
        update_weights_from_tensorc                    s   g | ]	}t j d dqS )T)
output_str)r   	serialize).0_r<   r)   r*   
<listcomp>W   s    zFHttpServerEngineAdapter.update_weights_from_tensor.<locals>.<listcomp>)serialized_named_tensorsr=   r>   )r;   ranger
   tp_size)r2   r<   r=   r>   r)   rD   r*   r?   H   s   

z2HttpServerEngineAdapter.update_weights_from_tensorc                 C   s   t | jj d S N)r	   r1   pidr2   r)   r)   r*   shutdown`   s   z HttpServerEngineAdapter.shutdownc                 C   s:   |||||||||	|
|d}dd |  D }| d|S )N)textsampling_params	input_ids
image_datareturn_logproblogprob_start_lentop_logprobs_numtoken_ids_logprob	lora_pathcustom_logit_processorpriorityc                 S   s   i | ]\}}|d ur||qS rI   r)   )rB   kvr)   r)   r*   
<dictcomp>   s    z4HttpServerEngineAdapter.generate.<locals>.<dictcomp>generate)itemsr;   )r2   promptrN   rO   rP   rQ   rR   rS   rT   rU   rV   rW   r6   r)   r)   r*   r[   c   s   z HttpServerEngineAdapter.generatec                 C   
   |  dS )Nrelease_memory_occupationr;   rK   r)   r)   r*   r_         
z1HttpServerEngineAdapter.release_memory_occupationc                 C   r^   )Nresume_memory_occupationr`   rK   r)   r)   r*   rb      ra   z0HttpServerEngineAdapter.resume_memory_occupationc                 C   r^   )Nr>   r`   rK   r)   r)   r*   r>      ra   z#HttpServerEngineAdapter.flush_cacherI   )NF)NNNNFNNNNNN)__name__
__module____qualname____doc__r4   strr   dictr;   r   r   torchTensorboolr?   rL   r[   r_   rb   r>   r)   r)   r)   r*   r,   -   s:    

 r,   )r   r   typingr   r   r   r   ri   !sglang.srt.entrypoints.EngineBaser   "sglang.srt.entrypoints.http_serverr   sglang.srt.server_argsr   sglang.srt.utilsr   r	   r   r+   r,   r)   r)   r)   r*   <module>   s    