o
    
۾i5                     @   s  U d dl Z d dlZd dlZd dlZd dlZd dlmZ d dlmZ d dl	m
Z
 d dlmZ d dlZd dlmZmZmZ d dlmZ d dlmZ d d	lmZ d d
lmZmZmZ d dlmZmZmZm Z m!Z! d dl"m#Z# d dl$m%Z% d dl&m'Z'm(Z( d dl)m*Z* d dl+m,Z, d dl-m.Z. d dl/m0Z0 e.dZ1G dd dZ2G dd dZ3de4dB de5dB fddZ6dede5dB fddZ7de5de4fd d!Z8G d"d# d#Z9d$e:ddfd%d&Z;d$e:ddfd'd(Z<d)efd*d+Z=d,ed-efd.d/Z>d,ed-efd0d1Z?e@ ZAe@e jB eCd2< e
d3efd4d5ZDdS )6    N)	Namespace)	Awaitable)asynccontextmanager)
HTTPStatus)FastAPIHTTPExceptionRequest)RequestValidationError)JSONResponseiterate_in_threadpool)URLHeadersMutableHeaders)ASGIAppMessageReceiveScopeSend)envs)EngineClient)	ErrorInfoErrorResponse)sanitize_message)VLLMValidationError)init_logger)freeze_gc_heapz$vllm.entrypoints.openai.server_utilsc                	   @   sZ   e Zd ZdZdedee ddfddZdede	fd	d
Z
dedededed fddZdS )AuthenticationMiddlewareaP  
    Pure ASGI middleware that authenticates each request by checking
    if the Authorization Bearer token exists and equals anyof "{api_key}".

    Notes
    -----
    There are two cases in which authentication is skipped:
        1. The HTTP method is OPTIONS.
        2. The request path doesn't start with /v1 (e.g. /health).
    apptokensreturnNc                 C   s   || _ dd |D | _d S )Nc                 S   s    g | ]}t |d  qS )utf-8)hashlibsha256encodedigest).0t r(   X/home/ubuntu/.local/lib/python3.10/site-packages/vllm/entrypoints/openai/server_utils.py
<listcomp>.   s     z5AuthenticationMiddleware.__init__.<locals>.<listcomp>)r   
api_tokens)selfr   r   r(   r(   r)   __init__,   s   z!AuthenticationMiddleware.__init__headersc           	      C   sj   | d}|s	dS |d\}}}| dkrdS t|d }d}| jD ]
}|t	||O }q(|S )NAuthorizationF bearerr!   )
get	partitionlowerr"   r#   r$   r%   r+   secretscompare_digest)	r,   r.   authorization_header_valuescheme_param
param_hashtoken_match
token_hashr(   r(   r)   verify_token0   s   

z%AuthenticationMiddleware.verify_tokenscopereceivesendc                 C   s   |d dvs|d dkr|  |||S |dd}t|dj|}t|d}|dr?| |s?td	d
idd}||||S |  |||S )Ntypehttp	websocketmethodOPTIONS	root_path r?   z/v1errorUnauthorizedi  )contentstatus_code)	r   r2   r   pathremoveprefixr   
startswithr>   r
   )r,   r?   r@   rA   rH   url_pathr.   responser(   r(   r)   __call__A   s   
z!AuthenticationMiddleware.__call__)__name__
__module____qualname____doc__r   liststrr-   r   boolr>   r   r   r   r   rT   r(   r(   r(   r)   r       s
    "r   c                	   @   s@   e Zd ZdZdeddfddZdeded	ede	d fd
dZ
dS )XRequestIdMiddlewarez
    Middleware the set's the X-Request-Id header for each response
    to a random uuid4 (hex) value if the header isn't already
    present in the request, otherwise use the provided request id.
    r   r    Nc                 C   s
   || _ d S N)r   )r,   r   r(   r(   r)   r-   W   s   
zXRequestIdMiddleware.__init__r?   r@   rA   c                    sJ   |d dvr|  ||S t|d dtdd f fdd}|  |||S )NrB   rC   rJ   messager    c                    sL   | d dkrt | d d} dt j}|d| | I dH  dS )zx
            Custom send function to mutate the response headers
            and append X-Request-Id to it.
            rB   zhttp.response.startr.   )rawzX-Request-IdN)r   r2   uuiduuid4hexappend)r^   response_headers
request_idrequest_headersrA   r(   r)   send_with_request_ida   s   z;XRequestIdMiddleware.__call__.<locals>.send_with_request_id)r   r   r   )r,   r?   r@   rA   rh   r(   rf   r)   rT   Z   s
   
zXRequestIdMiddleware.__call__)rU   rV   rW   rX   r   r-   r   r   r   r   rT   r(   r(   r(   r)   r\   P   s    "r\   log_config_filer    c              
   C   sx   | sd S zt | }t|W  d    W S 1 sw   Y  W d S  ty; } ztd| | W Y d }~d S d }~ww )Nz0Failed to load log config from file %s: error %s)openjsonload	Exceptionloggerwarning)ri   fer(   r(   r)   load_log_configo   s   
(rr   argsc                 C   sP   t | j}|dur|S | jr&ddlm} dd | jdD }||| jdS dS )a#  
    Get the uvicorn log config based on the provided arguments.

    Priority:
    1. If log_config_file is specified, use it
    2. If disable_access_log_for_endpoints is specified, create a config with
       the access log filter
    3. Otherwise, return None (use uvicorn defaults)
    Nr   )create_uvicorn_log_configc                 S   s   g | ]
}|  r|  qS r(   )strip)r&   pr(   r(   r)   r*      s    z*get_uvicorn_log_config.<locals>.<listcomp>,)excluded_paths	log_level)rr   ri    disable_access_log_for_endpointsvllm.logging_utilsrt   splituvicorn_log_level)rs   
log_configrt   rx   r(   r(   r)   get_uvicorn_log_config|   s   

r   
chunk_datac                 C   s&  zUddl m} ddlm} | ddkr1|| }|jr+|jd jjr.|jd jjW S W dS W dS | ddkrM|| }|jrP|jd j	rS|jd j	W S W dS W dS W dS  t
jy   d| v r| d r| d d }d|v r~|d d	r~|d d	  Y S |d
r|d
  Y S Y dS Y dS Y dS w )z0Extract content from a streaming response chunk.r   )ChatCompletionStreamResponse)CompletionStreamResponseobjectzchat.completion.chunktext_completionchoicesdeltarM   textrI   )0vllm.entrypoints.openai.chat_completion.protocolr   +vllm.entrypoints.openai.completion.protocolr   r2   model_validater   r   rM   r   pydanticValidationError)r   r   r   chat_responsecompletion_responsechoicer(   r(   r)   _extract_content_from_chunk   sF   




r   c                   @   s`   e Zd ZdZdd Zdedee fddZdede	fd	d
Z
de	ddfddZde	fddZdS )
SSEDecoderz:Robust Server-Sent Events decoder for streaming responses.c                 C   s   d| _ g | _d S )NrI   )buffercontent_bufferr,   r(   r(   r)   r-      s   
zSSEDecoder.__init__chunkr    c                 C   s   ddl }z|d}W n ty   g  Y S w |  j|7  _g }d| jv rn| jdd\}| _|d}|dri|dd  }|d	krM|d
di n|riz|	|}|d|d W n
 |j
yh   Y q w d| jv s%|S )z4Decode a chunk of SSE data and return parsed events.r   Nr!   
   zdata:    z[DONE]rB   donedata)rB   r   )rk   decodeUnicodeDecodeErrorr   r|   rstriprQ   ru   rc   loadsJSONDecodeError)r,   r   rk   	chunk_streventslinedata_str
event_datar(   r(   r)   decode_chunk   s0   




zSSEDecoder.decode_chunkr   c                 C   s   t |S )z Extract content from event data.)r   )r,   r   r(   r(   r)   extract_content   s   zSSEDecoder.extract_contentrM   Nc                 C   s   |r
| j | dS dS )zAdd content to the buffer.N)r   rc   )r,   rM   r(   r(   r)   add_content   s   zSSEDecoder.add_contentc                 C   s   d | jS )z"Get the complete buffered content.rI   )joinr   r   r(   r(   r)   get_complete_content   s   zSSEDecoder.get_complete_content)rU   rV   rW   rX   r-   bytesrY   dictr   rZ   r   r   r   r(   r(   r(   r)   r      s     r   response_bodyc                    sF   ddl m} t d  fdd}|| | _tdt dS )z/Log streaming response with robust SSE parsing.r   r   c                  3   s    D ]V}  d7  | V   | }|D ]E}|d dkr(|d }| q|d dkrX }|rNt|dkrC|d d d }	 td|    d S td    d S qqd S )	Nr   rB   r   r   i   rI   z9response_body={streaming_complete: content=%r, chunks=%d}z9response_body={streaming_complete: no_content, chunks=%d})r   r   r   r   lenrn   info)r   r   eventrM   full_contentchunk_countr   sse_decoderr(   r)   buffered_iterator   s:   

z2_log_streaming_response.<locals>.buffered_iteratorz,response_body={streaming_started: chunks=%d}N)starlette.concurrencyr   r   body_iteratorrn   r   r   )rS   r   r   r   r(   r   r)   _log_streaming_response   s   "r   c                 C   s>   z| d   }td| W dS  ty   td Y dS w )zLog non-streaming response.r   zresponse_body={%s}zresponse_body={<binary_data>}N)r   rn   r   r   )r   decoded_bodyr(   r(   r)   _log_non_streaming_response  s   r   requestc                    sz   || I d H }dd |j 2 I d H }tt||_ |jdd}|dk}|s.td |S |r7t|| |S t| |S )Nc                    s   g | z3 d H W }|q6 S r]   r(   )r&   sectionr(   r(   r)   r*   (  s    z log_response.<locals>.<listcomp>zcontent-typerI   z text/event-stream; charset=utf-8zresponse_body={<empty>})	r   r   iterr.   r2   rn   r   r   r   )r   	call_nextrS   r   content_typeis_streamingr(   r(   r)   log_response&  s   

r   r9   excc                    s8   t tt|jt|jj|jdd}t| |jdS )N)r^   rB   coderK   rN   )	r   r   r   detailr   rN   phraser
   
model_dump)r9   r   errr(   r(   r)   http_exception_handler8  s   
r   c           
         s   d }|  }|D ]}d|v r%d|d v r%|d d }t|tr%|j} nq	t|}t|}|r>|r>||kr>| d| }n|}ttt|tj	j
tj	|dd}	t|	 tj	dS )NctxrK   r0   )r^   rB   r   r:   r   r   )errors
isinstancer   	parameterrZ   r   r   r   r   BAD_REQUESTr   r
   r   )
r9   r   r:   r   rK   	ctx_errorexc_str
errors_strr^   r   r(   r(   r)   validation_exception_handlerC  s0   
r   _running_tasksr   c                   s   zB| j jr"| j j  fdd}t| }t| |tj nd }t	  zd V  W |d ur4|
  n
|d ur>|
  w w W | ` d S | ` w )Nc                      s&   	 t tjI d H    I d H  qr]   )asynciosleepr   VLLM_LOG_STATS_INTERVALdo_log_statsr(   engine_clientr(   r)   
_force_logi  s
   zlifespan.<locals>._force_log)state	log_statsr   r   create_taskr   addadd_done_callbackremover   cancel)r   r   taskr(   r   r)   lifespanc  s(   

r   )Er   r"   rk   r5   r`   argparser   collections.abcr   
contextlibr   rD   r   r   fastapir   r   r   fastapi.exceptionsr	   fastapi.responsesr
   r   r   starlette.datastructuresr   r   r   starlette.typesr   r   r   r   r   vllmr   vllm.engine.protocolr   'vllm.entrypoints.openai.engine.protocolr   r   vllm.entrypoints.utilsr   vllm.exceptionsr   vllm.loggerr   vllm.utils.gc_utilsr   rn   r   r\   rZ   r   rr   r   r   r   rY   r   r   r   r   r   setr   Task__annotations__r   r(   r(   r(   r)   <module>   sL   
0!5-	