o
    wi-                     @   s   d Z ddlZddlZddlmZmZ ddlZddlZddl	Z
ddlmZ ddlmZ ddlmZmZmZmZmZmZmZmZmZmZ ddlmZmZ d	ed
eejef fddZG dd dZdS )uv  Main server holding entry-point to all the interceptors.

The arch is as follows:


         ┌───────────────────────┐
         │                       │
         │ NVIDIA Eval Factory   │
         │                       │
         └───▲──────┬────────────┘
             │      │
     returns │      │
             │      │ calls
             │      │
             │      │
         ┌───┼──────┼──────────────────────────────────────────────────┐
         │   │      ▼                                                  │
         │ AdapterServer (@ localhost:<free port>)                     │
         │                                                             │
         │   ▲      │       chain of RequestInterceptors:              │
         │   │      │       flask.Request                              │
         │   │      │       is passed on the way up                    │
         │   │      │                                                  │   ┌──────────────────────┐
         │   │ ┌────▼───────────────────────────────────────────────┐  │   │                      │
         │   │ │intcptr_1─────►intcptr_2───►...───►intcptr_N────────┼──┼───►                      │
         │   │ │                     │                              │  │   │                      │
         │   │ └─────────────────────┼──────────────────────────────┘  │   │                      │
         │   │                       │(e.g. for caching interceptors,  │   │  upstream endpoint   │
         │   │                       │ this "shortcut" will happen)    │   │   with actual model  │
         │   │                       │                                 │   │                      │
         │   │                       └─────────────┐                   │   │                      │
         │   │                                     │                   │   │                      │
         │ ┌─┼─────────────────────────────────────▼────┐              │   │                      │
         │ │intcptr'_M◄──intcptr'_2◄──...◄───intcptr'_1 ◄──────────────┼───┤                      │
         │ └────────────────────────────────────────────┘              │   └──────────────────────┘
         │                                                             │
         │              Chain of ResponseInterceptors:                 │
         │              requests.Response is passed on the way down    │
         │                                                             │
         │                                                             │
         └─────────────────────────────────────────────────────────────┘

In other words, interceptors are pieces of independent logic which should be
relatively easy to add separately.



    N)OptionalTuple)AdapterConfig)logging   )
AdapterMetadataAdapterRequestAdapterResponseEndpointInterceptorRequestInterceptorRequestLoggingInterceptorResponseInterceptorResponseLoggingInterceptorResponseReasoningInterceptorSystemMessageInterceptor)find_free_portwait_for_serveradapter_configreturnc                 C   sv   | j dur| j nt | _ tdtddfdd}tj|| fi d}|  td| j s7|  t	d| j  d	|| fS )
a  Create and start a server process, returning the process and the config.

    This makes sure that the factory function is not needing any complex serialization for
    multiprocessing.

    Args:
        api_url: The API URL the adapter will call
        adapter_config: Configuration for the adapter server

    Returns:
        Tuple of (process, adapter_config) where process is the running server process,
        and adapter_config is the configuration with port filled in.
    Nr   r   c                 S   s   t | d}|  d S )N)r   )_AdapterServerrun)r   server r   l/home/ubuntu/sommelier/.venv/lib/python3.10/site-packages/nemo/collections/llm/evaluation/adapters/server.pycreate_server_factoryo   s   
z4create_server_process.<locals>.create_server_factory)targetargskwargs	localhostz6Could not wait till adapter server is up on localhost:.)

local_portr   staticmethodr   multiprocessingProcessstartr   	terminateConnectionError)r   r   pr   r   r   create_server_processZ   s$   r(   c                   @   s   e Zd ZU dZeed< eed< ee ed< ee	 ed< e
jed< eed< defd	d
Zdededee dee dee f
ddZd"ddZg dZedejdeeeef  fddZdede
jfddZde	dededdfd d!ZdS )#r   zHMain server which serves on a local port and holds chain of interceptorsadapter_hostadapter_portrequest_interceptorsresponse_interceptorsappapi_urlr   c                 C   s   g | _ g | _tt| _| jjdddidgd| j | jjddgd| j d| _|j	d	us1J |j	| _
|| _td
|  | j|j|j|j|j|jd d	S )z
        Initializes the app, creates server and adds interceptors

        Args:
            adapter_config: should be obtained from the evaluation script, see `api.py`
        /path POST)defaultsmethodsz/<path:path>)r4   r   Nz$Using the following adapter config: )use_reasoningend_reasoning_tokencustom_system_promptmax_logged_requestsmax_logged_responses)r+   r,   flaskFlask__name__r-   route_handlerr)   r    r*   r   r   info_build_interceptor_chainsr5   r6   r7   r8   r9   )selfr   r   r   r   __init__   s"   
z_AdapterServer.__init__r5   r6   r7   r8   r9   c                 C   sn   |r| j t|d | j t|d | j t| jjd | jt|d |r5| jt	|d d S d S )N)new_system_message)max_requests)r.   )max_responses)r6   )
r+   appendr   r   r
   r   r.   r,   r   r   )rA   r5   r6   r7   r8   r9   r   r   r   r@      s   	z(_AdapterServer._build_interceptor_chainsr   Nc                 C   sV   t d| j d| j d tdt j tj	j
| j| j| jdd t d dS )	zStart the Flask server.z*Starting the evaluation adapter server at :z...werkzeugT)hostnameportapplicationthreadedz!Evaluation adapter server startedN)r   r?   r)   r*   _original_logging	getLoggersetLevelERRORrH   serving
run_simpler-   )rA   r   r   r   r      s   z_AdapterServer.run)zcontent-encodingzcontent-lengthztransfer-encoding
connectionresponsec                    s    fdd|j  D S )z1Process response headers, removing excluded ones.c                    s&   g | ]\}}|   jvr||fqS r   )lower_EXCLUDED_HEADERS).0kvclsr   r   
<listcomp>   s   & z<_AdapterServer._process_response_headers.<locals>.<listcomp>)headersitems)r[   rT   r   rZ   r   _process_response_headers   s   z(_AdapterServer._process_response_headersr0   c                 C   s   t tjt d}d }| jD ]}||}t|tr|} nt|t r$|}q|d us-J d| jD ]}z|	|}W q0 t
yM } z| |||  d }~ww tj|jj|jj| |jdS )N)rmetaz%There should be a response to process)rT   statusr]   )r   r:   requestr   r+   intercept_request
isinstancer	   r,   intercept_response	Exception_log_response_interceptor_errorResponser`   contentstatus_coder_   )rA   r0   adapter_requestadapter_responseinterceptoroutputer   r   r   r>      s6   





z_AdapterServer._handlerrn   rm   errorc                 C   sP   d|j j d|jj d|jj d|jjjddd dt| d	}t	| d S )
Nu3   ❌ Error in Response Interceptor ❌
Interceptor: z
Adapter Response Status Code: z
Adapter Response Status Text: z
Adapter Response Content: zutf-8ignore)errorsz
Error Details: 
)
	__class__r<   r`   rk   reasonrj   decodereprr   rq   )rA   rn   rm   rq   error_messager   r   r   rh      s   z._AdapterServer._log_response_interceptor_error)r   N)r<   
__module____qualname____doc__str__annotations__intlistr   r   r:   r;   r   rB   boolr   r@   r   rV   classmethodrequestsri   tupler_   r>   r	   rg   rh   r   r   r   r   r      sH   
 

!

"r   )r|   r   rM   r"   typingr   r   r:   r   werkzeug.servingrH   #nemo.collections.llm.evaluation.apir   
nemo.utilsinterceptorsr   r   r	   r
   r   r   r   r   r   r   utilsr   r   r#   r(   r   r   r   r   r   <module>   s"   10
*