o
    
۾i                     @   sr  d dl Z d dlZd dlmZ d dlmZmZmZmZm	Z	m
Z
 d dlmZmZ d dlmZ d dlmZ d dlmZ d dlmZmZ d d	lmZ d d
lmZ d dlmZmZ d dlmZ ee Z!de	defddZ"de	dedB fddZ#de	defddZ$e Z%e%j&deegej'j(ddi iiej)j(deiej*j(deiej+j(deiideedede	fddZ,defddZ-dS )     N)
HTTPStatus)	APIRouterDependsFastAPIHTTPExceptionRequestResponse)JSONResponseStreamingResponse)EngineClient)ErrorResponse)validate_json_request)GenerateRequestGenerateResponse)ServingTokens)OpenAIServingTokenization)load_aware_callwith_cancellation)init_loggerrequestreturnc                 C   
   | j jjS N)appstateopenai_serving_tokenizationr    r   \/home/ubuntu/.local/lib/python3.10/site-packages/vllm/entrypoints/serve/disagg/api_router.pytokenization"      
r   c                 C   r   r   )r   r   serving_tokensr   r   r   r   generate_tokens&   r    r"   c                 C   r   r   )r   r   engine_clientr   r   r   r   r#   *   r    r#   z/inference/v1/generatecontenttext/event-streammodel)dependencies	responsesraw_requestc              
      s   t |}|d u rt|jddS z|| |I d H }W n ty3 } z||W  Y d }~S d }~ww t|trCt| |j	j
dS t|trOt| dS t|ddS )Nz.The model does not support generate tokens API)message)r$   status_code)r$   r%   )r$   
media_type)r"   r   create_error_responseserve_tokens	Exception
isinstancer   r	   
model_dumperrorcoder   r
   )r   r)   handler	generatorer   r   r   generate1   s&   

r7   r   c                 C   s6   t | jjddrtddtfdd}| t d S )Ntokens_onlyFz/abort_requestsr)   c              
      s   z	|   I dH }W n t jy# } zttjjd| d|d}~ww |d}|du r5ttjjddtt	| 
| tddS )zq
            Abort one or more requests. To be used in a
            Disaggregated Everything setup.
            NzJSON decode error: )r+   detailrequest_idsz%Missing 'request_ids' in request body   )r+   )jsonJSONDecodeErrorr   r   BAD_REQUESTvaluegetasynciocreate_taskr#   abortr   )r)   bodyr6   r:   r   r   r   abort_requestsV   s(   

z%attach_router.<locals>.abort_requests)getattrr   argsrouterpostr   include_router)r   rE   r   r   r   attach_routerS   s   rK   ).rA   r<   httpr   fastapir   r   r   r   r   r   fastapi.responsesr	   r
   vllm.engine.protocolr   'vllm.entrypoints.openai.engine.protocolr   vllm.entrypoints.openai.utilsr   &vllm.entrypoints.serve.disagg.protocolr   r   %vllm.entrypoints.serve.disagg.servingr   'vllm.entrypoints.serve.tokenize.servingr   vllm.entrypoints.utilsr   r   vllm.loggerr   __name__loggerr   r"   r#   rH   rI   OKr?   r>   	NOT_FOUNDINTERNAL_SERVER_ERRORr7   rK   r   r   r   r   <module>   s>    
