o
    پi2                     @   s   d Z ddlZddlmZmZ ddlmZmZ ddlZddlm	Z	 ddl
mZ ddlmZmZmZmZmZmZmZmZmZmZ ddlmZ G d	d
 d
ZdS )z
Ollama-compatible API serving handlers.

This module provides handlers that convert Ollama API requests to SGLang's
internal format and return Ollama-compatible responses.
    N)datetimetimezone)AsyncIteratorUnion)Request)StreamingResponse)
OllamaChatRequestOllamaChatResponseOllamaChatStreamResponseOllamaGenerateRequestOllamaGenerateResponseOllamaGenerateStreamResponseOllamaMessageOllamaModelInfoOllamaShowResponseOllamaTagsResponse)GenerateReqInputc                   @   s   e Zd ZdZdd ZdefddZd dedefd	d
Zde	de
deeef fddZdede
dedefddZdede
dedefddZdede
deeef fddZdede
dedefddZdede
dedefddZdefddZdedefddZdS )!OllamaServingz,Handler for Ollama-compatible API endpoints.c                 C   s
   || _ d S N)tokenizer_manager)selfr    r   Y/home/ubuntu/.local/lib/python3.10/site-packages/sglang/srt/entrypoints/ollama/serving.py__init__"   s   
zOllamaServing.__init__returnc                 C   s   t tjddd d S )z'Get current timestamp in Ollama format.z%Y-%m-%dT%H:%M:%S.%fNZ)r   nowr   utcstrftime)r   r   r   r   _get_timestamp%   s   zOllamaServing._get_timestampNoptionsc              	   C   sX   i }|r"ddddddddd	}|  D ]\}}||v r!|| ||< qd|vr*d
|d< |S )z1Convert Ollama options to SGLang sampling params.temperaturetop_ptop_kmax_new_tokensstoppresence_penaltyfrequency_penaltyseed)r"   r#   r$   num_predictr&   r'   r(   r)   i   )items)r   r!   sampling_paramsparam_mappingollama_paramsglang_paramr   r   r   #_convert_options_to_sampling_params)   s$   
z1OllamaServing._convert_options_to_sampling_paramsrequestraw_requestc                    sx   | j j}dd |jD }| j jj|ddd}| |j}t|||jd}|jr2| 	|||I dH S | 
|||I dH S )zHandle /api/chat endpoint.c                 S   s   g | ]	}|j |jd qS )rolecontentr3   ).0msgr   r   r   
<listcomp>K   s    z-OllamaServing.handle_chat.<locals>.<listcomp>T)tokenizeadd_generation_prompt)	input_idsr,   streamN)r   served_model_namemessages	tokenizerapply_chat_templater0   r!   r   r<   _stream_chat_response_generate_chat_response)r   r1   r2   
model_namer>   
prompt_idsr,   gen_requestr   r   r   handle_chatD   s.   

zOllamaServing.handle_chatrE   rC   c           	         s   t  }| j|| I dH }t  }|| }|dd}t||  td|ddd||di d	d|di d
ddS )z%Generate non-streaming chat response.Ntext 	assistantr3   Tr&   	meta_infoprompt_tokenscompletion_tokens)model
created_atmessagedonedone_reasontotal_durationprompt_eval_count
eval_count)	timetime_nsr   generate_request	__anext__getr	   r    r   	r   rE   r2   rC   
start_timeresponseend_timerR   output_textr   r   r   rB   i   s(   
z%OllamaServing._generate_chat_responsec                    ,   dt t f fdd}t| ddS )z!Generate streaming chat response.r   c                    s   d} j  2 zK3 d H W }|dd}|di dd u}|t| d  }|} |r=t tdddddd	}nt td|dd
d}t|	 d V  q
6 d S )NrH   rG   rJ   finish_reasonrI   r3   Tr&   )rM   rN   rO   rP   rQ   F)rM   rN   rO   rP      
)
r   rW   rY   lenr
   r    r   orjsondumps
model_dumpprevious_textchunkrG   is_donedeltar\   rE   rC   r2   r   r   r   generate_stream   s2   

z<OllamaServing._stream_chat_response.<locals>.generate_streamapplication/x-ndjson
media_typer   bytesr   r   rE   r2   rC   rl   r   rk   r   rA      s   z#OllamaServing._stream_chat_responsec                    s   | j j}|j}|jr|j d| }|r| s;t||  dddd |jr9dtt	 f fdd}t
| d	d
S  S | |j}t|||jd}|jrV| |||I dH S | |||I dH S )zHandle /api/generate endpoint.z

rH   Tr&   rM   rN   r\   rP   rQ   r   c                     s   t   d V  d S )Nra   )rc   rd   re   r   empty_responser   r   empty_stream   s   z3OllamaServing.handle_generate.<locals>.empty_streamrm   rn   )rG   r,   r<   N)r   r=   promptsystemstripr   r    r<   r   rq   r   r0   r!   r   _stream_generate_response_generate_generate_response)r   r1   r2   rC   rw   rv   r,   rE   r   rt   r   handle_generate   sB   

zOllamaServing.handle_generatec           	         sz   t  }| j|| I dH }t  }|| }|dd}t||  |dd||di dd|di ddd	S )
z)Generate non-streaming generate response.NrG   rH   Tr&   rJ   rK   rL   )rM   rN   r\   rP   rQ   rR   rS   rT   )rU   rV   r   rW   rX   rY   r   r    rZ   r   r   r   r{      s(   z)OllamaServing._generate_generate_responsec                    r_   )z%Generate streaming generate response.r   c                    s   d} j  2 zC3 d H W }|dd}|di dd u}|t| d  }|} |r9t dddd}n
t |dd	}t| d
 V  q
6 d S )NrH   rG   rJ   r`   Tr&   rs   F)rM   rN   r\   rP   ra   )	r   rW   rY   rb   r   r    rc   rd   re   rf   rk   r   r   rl      s2   z@OllamaServing._stream_generate_response.<locals>.generate_streamrm   rn   rp   rr   r   rk   r   rz      s   z'OllamaServing._stream_generate_responsec              
   C   sJ   | j j}t|||  dddd|v r|dd n|ddd}t|gd	S )
z2Handle /api/tags endpoint - list available models.r   zGsha256:sglang0000000000000000000000000000000000000000000000000000000000sglang/unknown)formatfamilyparameter_size)namerM   modified_atsizedigestdetails)models)r   r=   r   r    splitr   )r   rC   
model_infor   r   r   get_tags!  s   zOllamaServing.get_tagsrM   c                 C   s   | j j}d|v r|dd n|}dD ]}||r%|dt|  } nq|r+|jnd}tdd| d| d	d
| d|  dd||gdddd|d|dd| d|| dd| dd| ddidgdS )z3Handle /api/show endpoint - show model information.r~   r   )z	-Instructz-Chatz-BaseNi   rH   zFROM z
PARAMETER num_ctx 
znum_ctx r}   r   )parent_modelr   r   familiesr   quantization_levelzgeneral.architecturezgeneral.namezgeneral.parameter_countr   z.context_lengthz.block_countz.embedding_lengthz.attention.head_count
completion)license	modelfile
parameterstemplater   r   r   capabilities)r   model_configr   endswithrb   context_lenr   r    )r   rM   r   model_familysuffixr   r   r   r   get_show6  s>   
	



	zOllamaServing.get_showr   )__name__
__module____qualname____doc__r   strr    dictr0   r   r   r   r	   r   rF   r   rB   rA   r   r   r|   r{   rz   r   r   r   r   r   r   r   r   r      sr    

%

)

2

(r   )r   rU   r   r   typingr   r   rc   fastapir   fastapi.responsesr   &sglang.srt.entrypoints.ollama.protocolr   r	   r
   r   r   r   r   r   r   r   sglang.srt.managers.io_structr   r   r   r   r   r   <module>   s    0