o
    
۾i                     @   s  d dl Z d dlmZ d dlmZmZmZmZmZ d dl	m
Z
 d dlmZ d dlmZ d dlmZ d dlmZmZ d d	lmZ eeZd
edefddZe Zejdeegejjdeiejjdeiejjdeiej jdeiiddefddZ!eddefddZ"defddZ#dS )    N)
HTTPStatus)	APIRouterDependsFastAPIHTTPExceptionRequest)JSONResponse)EngineClient)ErrorResponse)validate_json_request)get_scaling_elastic_epset_scaling_elastic_ep)init_loggerrequestreturnc                 C   s
   | j jjS N)appstateengine_client)r    r   `/home/ubuntu/.local/lib/python3.10/site-packages/vllm/entrypoints/serve/elastic_ep/api_router.pyr      s   
r   z/scale_elastic_epmodel)dependencies	responsesraw_requestc              
      sR  z	|   I d H }W n t jy } ztddd|d }~ww |d}|dd}|d u r4tdddt|tr=|dkrCtdd	dt|trL|dkrRtdd
dtd t| }zIz|||I d H  t	dd| diW W td S  t
y } ztdd| dd|d }~w ty } ztd| tddd|d }~ww td w )Ni  zInvalid JSON format)status_codedetailnew_data_parallel_sizedrain_timeoutx   z"new_data_parallel_size is requiredr   z1new_data_parallel_size must be a positive integerz(drain_timeout must be a positive integerTmessagez
Scaled to z data parallel enginesFi  z0Scale failed due to request drain timeout after z secondszScale failed: %si  zScale failed)jsonJSONDecodeErrorr   get
isinstanceintr   r   scale_elastic_epr   TimeoutError	Exceptionloggererror)r   bodyer   r   clientr   r   r   r&       s^   


r&   z/is_scaling_elastic_epc                    s   t dt iS )Nis_scaling_elastic_ep)r   r   )r   r   r   r   r.   Z   s   r.   r   c                 C   s   |  t d S r   )include_routerrouter)r   r   r   r   attach_router_   s   r1   )$r!   httpr   fastapir   r   r   r   r   fastapi.responsesr   vllm.engine.protocolr	   'vllm.entrypoints.openai.engine.protocolr
   vllm.entrypoints.openai.utilsr   ,vllm.entrypoints.serve.elastic_ep.middlewarer   r   vllm.loggerr   __name__r)   r   r0   postOKvaluedictBAD_REQUESTREQUEST_TIMEOUTINTERNAL_SERVER_ERRORr&   r.   r1   r   r   r   r   <module>   s2   
0