o
    پi2&                     @   sD   d Z ddlmZ ddlZG dd dZdd Zedkr e  dS dS )	u   
Smart Router: Automatically routes requests between local Ollama and remote SGLang.

Uses an LLM judge to classify tasks as simple or complex, then routes accordingly:
- Simple tasks → Local Ollama (fast response)
- Complex tasks → Remote SGLang (powerful model)

Usage:
    from sglang.srt.entrypoints.ollama.smart_router import SmartRouter

    router = SmartRouter(
        local_host="http://localhost:11434",
        remote_host="http://sglang-server:30001",
    )
    response = router.chat("Hello!")
    )OptionalNc                   @   s   e Zd ZdZdZ						dded	ed
ededee dee fddZ	d dedede	eef fddZ
d dedede	eef fddZ				d!dedee dedededefddZ				d!dedee dededef
ddZdS )"SmartRouterzVRoutes requests between local Ollama and remote SGLang using LLM-based classification.a  You are a task classifier. Classify the following user request into one of two categories.

Categories:
- SIMPLE: Quick responses, greetings, factual questions, definitions, translations, basic Q&A
- COMPLEX: Tasks requiring deep reasoning, multi-step analysis, long explanations, creative writing, detailed research

Reply with ONLY one word: either SIMPLE or COMPLEX.

User request: "{prompt}"

Category:http://localhost:11434http://localhost:30001llama3.2Qwen/Qwen2.5-1.5B-InstructN
local_hostremote_hostlocal_modelremote_modeljudge_model
judge_hostc                 C   sP   t j|d| _t j|d| _|| _|| _|p|| _|p|| _t j| jd| _dS )a  
        Initialize the smart router.

        Args:
            local_host: URL of local Ollama server
            remote_host: URL of remote SGLang server
            local_model: Model name for local Ollama
            remote_model: Model name for remote SGLang
            judge_model: Model for LLM-based classification (default: same as local_model)
            judge_host: Host for judge model (default: same as local_host)
        )hostN)	ollamaClientlocal_clientremote_clientr
   r   r   r   judge_client)selfr   r	   r
   r   r   r    r   ^/home/ubuntu/.local/lib/python3.10/site-packages/sglang/srt/entrypoints/ollama/smart_router.py__init__'   s   

zSmartRouter.__init__Fpromptverbosereturnc              
   C   s   z9| j j|dd d}| jj| jd|dgdddd	}|d
 d   }|r0td|  d|v r7W dS W dS  tyV } z|rKtd| d W Y d}~dS d}~ww )zm
        Use LLM to classify the prompt.

        Returns:
            Tuple of (use_remote, reason)
        Ni  )r   userrolecontentr   
   )temperaturenum_predict)modelmessagesoptionsmessager   z[Router] LLM Judge: COMPLEX)TzComplex task)FzSimple taskz[Router] LLM Judge failed: z, defaulting to local)Fz!Judge failed, defaulting to local)	CLASSIFICATION_PROMPTformatr   chatr   stripupperprint	Exception)r   r   r   classification_promptresponseresulter   r   r   _classify_with_llmE   s*   	

zSmartRouter._classify_with_llmc                 C   s   |  ||S )z
        Determine if the prompt should be routed to remote SGLang.

        Args:
            prompt: User's input prompt
            verbose: Print debug information

        Returns:
            Tuple of (should_use_remote, reason)
        )r2   )r   r   r   r   r   r   should_use_remoteh   s   zSmartRouter.should_use_remoter#   force_localforce_remotec              
   C   sp  |du rd|dg}|}nd}t |D ]}|ddkr$|dd} nq|r,d\}}	n|r3d\}}	n| ||\}}	|rF| j}
| j}d	}n| j}
| j}d
}|rZtd| d|  z|
j||d}|d d |||	dW S  t	y } z>|rtd| d| d |s| jn| j}|s| jn| j}|sd	nd
}|j||d}|d d ||d| dW  Y d}~S d}~ww )a  
        Route the request and get response.

        Args:
            prompt: User's input (used if messages is None)
            messages: Full message history (overrides prompt if provided)
            verbose: Print routing decision
            force_local: Force use of local model
            force_remote: Force use of remote model

        Returns:
            Response dict with 'content', 'model', 'location', 'reason' keys
        Nr   r    r   r   TzForced remoteFzForced localRemote SGLangLocal Ollama[Router] -> 
 | Model: )r"   r#   r%   )r   r"   locationreasonz	[Router] z	 failed: z, falling back...zFallback from )
reversedgetr3   r   r   r   r
   r,   r)   r-   )r   r   r#   r   r4   r5   check_promptmsg
use_remoter>   clientr"   r=   r/   r1   fallback_clientfallback_modelfallback_locationr   r   r   r)   u   sZ   



zSmartRouter.chatc                 c   s    |du rd|dg}|}nd}t |D ]}|ddkr%|dd} nq|r-d\}}	n|r4d\}}	n| ||\}}	|rG| j}
| j}d	}n| j}
| j}d
}|r[td| d|  |
j||ddD ]}|V  qcdS )ze
        Route the request and stream response.

        Yields:
            Response chunks
        Nr   r   r6   r   r   r7   r8   r9   r:   r;   r<   T)r"   r#   stream)	r?   r@   r3   r   r   r   r
   r,   r)   )r   r   r#   r   r4   r5   rA   rB   rC   r>   rD   r"   r=   chunkr   r   r   chat_stream   s6   

zSmartRouter.chat_stream)r   r   r   r   NN)F)NFFF)__name__
__module____qualname____doc__r'   strr   r   booltupler2   r3   listdictr)   rJ   r   r   r   r   r      s~    


 #
Sr   c               
   C   sX  t d t d t d t d t d t d t d t d tdd	d
dd} g }	 zXtd }| dv r?t d W dS |sCW q*|d|d t dddd d}| j||ddD ]}|di dd}|rut |ddd ||7 }q\t d |d|d W n' ty   t d Y dS  t	y } zt d| d W Y d}~nd}~ww q+)z%Interactive demo of the smart router.z<============================================================z,Smart Router: Local Ollama <-> Remote SGLangz
Routing strategy:z8  LLM Judge classifies each request as SIMPLE or COMPLEXz'  - SIMPLE tasks -> Local Ollama (fast)z-  - COMPLEX tasks -> Remote SGLang (powerful)z
Type 'quit' to exit
r   r   r   r   )r   r	   r
   r   TzYou: )quitexitqzGoodbye!r   r   z
Assistant: r6   )endflush)r   r#   r   r%   r   
	assistantz	
Goodbye!zError: N)
r,   r   inputr*   lowerappendrJ   r@   KeyboardInterruptr-   )routerr#   
user_inputfull_responserI   r   r1   r   r   r   main   sX   
rb   __main__)rN   typingr   r   r   rb   rK   r   r   r   r   <module>   s     ^3
