o
    پi(U                     @  s  d dl mZ d dlZd dlZd dlmZmZmZmZm	Z	m
Z
mZ d dlmZ d dlmZmZ d dlmZmZmZmZmZmZmZ d dlmZ d dlmZ d d	lmZmZm Z m!Z! d d
l"m#Z# d dl$m%Z% d dl&m'Z' erxd dl(m)Z) d dl*m+Z+ e,e-Z.G dd deZ/dS )    )annotationsN)TYPE_CHECKINGAnyAsyncGeneratorDictListOptionalUnion)Request)ORJSONResponseStreamingResponse)CompletionRequestCompletionResponseCompletionResponseChoiceCompletionResponseStreamChoiceCompletionStreamResponseErrorResponseSglExt)OpenAIServingBase)UsageProcessor)&process_cached_tokens_details_from_retprocess_hidden_states_from_retprocess_routed_experts_from_retto_openai_style_logprobs)GenerateReqInput)'generate_completion_prompt_from_request)convert_json_schema_to_str)TemplateManager)TokenizerManagerc                      s   e Zd ZdZd2 fddZd3d
dZd4ddZ	d5d6ddZd7ddZd8ddZ	d9d d!Z
d:d#d$Zd;d*d+Zd<d-d.Zd=d0d1Z  ZS )>OpenAIServingCompletionz#Handler for /v1/completion requeststokenizer_managerr   template_managerr   c                   s   t  | || _d S N)super__init__r!   )selfr    r!   	__class__ e/home/ubuntu/.local/lib/python3.10/site-packages/sglang/srt/entrypoints/openai/serving_completions.pyr$   +   s   
z OpenAIServingCompletion.__init__returnstrc                 C  s   dS )Nzcmpl-r(   r%   r(   r(   r)   _request_id_prefix3   s   z*OpenAIServingCompletion._request_id_prefixrequestr   Optional[str]c                 C  s.   |j }|rt|trtdd |D rdS dS )z!Validate that the input is valid.c                 s  s    | ]}| V  qd S r"   r(   ).0pr(   r(   r)   	<genexpr>9   s    z<OpenAIServingCompletion._validate_request.<locals>.<genexpr>zPrompt cannot be emptyN)prompt
isinstancelistall)r%   r.   r3   r(   r(   r)   _validate_request6   s    z)OpenAIServingCompletion._validate_requestNraw_requestr
   *tuple[GenerateReqInput, CompletionRequest]c           
      C  s  |j r|jrtd |j}| jjdurt|}|j r!|jr!d}nd}| |}t	|t
s9t	|tr>t	|d t
r>d|i}nd|i}| |}| |j|j}tdi |i d|d|jdud	|jdurg|jnIdd
|ddd|jd|d|jd|jd|jd|jd|jd|jd|jd| |d|jd| |d|d|j}	|	|fS d
|ddd|jd|d|jd|jd|jd|jd|jd|jd|jd| |d|jd| |d|d|j}	|	|fS )z4Convert OpenAI completion request to internal formatzoEcho is not compatible with logprobs. To compute logprobs of input prompt, please use the native /generate API.Nr   text	input_idssampling_paramsreturn_logprobtop_logprobs_numlogprob_start_lenreturn_text_in_logprobsTstream	lora_pathbootstrap_hostbootstrap_portbootstrap_roomdata_parallel_rankreturn_hidden_statesreturn_routed_expertsrid	extra_keypriorityrouting_keycustom_labelscustom_logit_processorr(   )echologprobsloggerwarningr3   r!   completion_template_namer   _build_sampling_paramsr4   r+   r5   extract_custom_labels_resolve_lora_pathmodelrC   r   rB   rD   rE   rF   rG   rH   rI   rJ   _compute_extra_keyrL   extract_routing_keyrO   )
r%   r.   r8   r3   r@   r=   prompt_kwargsrN   rC   adapted_requestr(   r(   r)   _convert_to_internal_request>   s   




	


	


z4OpenAIServingCompletion._convert_to_internal_requestDict[str, Any]c                 C  s  i d|j d|jd|jd|jd|jd|jd|jd|jd	|jd
|j	d|j
d|jd|jd|jd|jd|jd|j|j|j|j|j|jd}|jrg|jjdkrgt|jjj|d< |S |jrv|jjdkrvd|d< |S |jr|jjdkrt|jjdd|d< |S )z)Build sampling parameters for the requesttemperaturemax_new_tokensmin_new_tokensstopstop_token_ids
stop_regextop_ptop_kmin_ppresence_penaltyfrequency_penaltyrepetition_penaltyregexjson_schemaebnfnno_stop_trim)
ignore_eosskip_special_tokens
logit_biascustom_paramssampling_seedjson_objectz{"type": "object"}structural_tagT)by_alias)r_   
max_tokens
min_tokensrb   rc   rd   re   rf   rg   rh   ri   rj   rk   rl   rm   rn   ro   rp   rq   rr   rs   seedresponse_formattyper   schema_
model_dump)r%   r.   r=   r(   r(   r)   rU      sp   	
z.OpenAIServingCompletion._build_sampling_paramsr\   r   r   c                   s"   t | |||d| j|dS )z#Handle streaming completion requestztext/event-stream)
media_type
background)r   _generate_completion_streamr    create_abort_task)r%   r\   r.   r8   r(   r(   r)   _handle_streaming_request   s   
z1OpenAIServingCompletion._handle_streaming_requestAsyncGenerator[str, None]c           &      C s   t t }i }i }i }i }i }	i }
i }z| j||2 z3 dH W }|dd}|d }|d d ||< |d d ||< |d dd|	|< |d d	d|
|< |d d
d||< ||d}|sr|jrr| ||}|| }d}|jdur|s|jr|d d }|d d }nd}d}||d}t|d d }|d d |d }|d d }t|dkr|dur|du rd}nt	||||d dg |d d}|||< |t|d }|| ||< |d d }t
||||r|d nd|rd|v r|d ndd}t|d d |d|g|jd}|jr)|jjr)tj||d||dd|_d|  dV  q6 |jrs|
rs|
 D ]3\}}|rqt|dkrQ|d ng }t|d d |dt
|d|ddg|jd}d|  dV  q?|jr|rtdd | D d}|durt|d d |dg |jt|d d!} d|   dV  |jr|jjrtj|||	|j| jjjd"}!t|d d |g |j|!d#}"|"jd$d%}#d|# dV  W n! ty }$ z| t |$}%d|% dV  W Y d}$~$nd}$~$ww d&V  dS )'z&Generate streaming completion responseNindexr   r;   	meta_infoprompt_tokenscompletion_tokenscached_tokenshidden_statesrouted_experts input_token_logprobsinput_top_logprobsoutput_token_logprobsfinish_reasonoutput_top_logprobsr   r   r   r   r|   matched)r   r;   rQ   r   matched_stopidtext_completion)r   createdobjectchoicesrX   )r   r   zdata: z

   r:   )r   r;   r   r   c                 s  s    | ]	}|d ur|V  qd S r"   r(   )r0   vr(   r(   r)   r2   J  s    zFOpenAIServingCompletion._generate_completion_stream.<locals>.<genexpr>)r   )r   r   r   r   rX   sglext	n_choicesenable_cache_report)r   r   r   rX   usageT)exclude_nonezdata: [DONE]

)!inttimer    generate_requestgetrP   _get_echo_textrQ   lenr   r   r   rX   stream_optionscontinuous_usage_statsr   calculate_token_usager   model_dump_jsonrH   itemsrI   nextvaluesr   include_usagecalculate_streaming_usagern   server_argsr   	Exceptioncreate_streaming_error_responser+   )&r%   r\   r.   r8   r   stream_buffersn_prev_tokensr   r   r   r   r   contentr   r;   stream_buffer	echo_textrQ   r   r   n_prev_tokentotal_output_logprobsoutput_logprobs_slicefinish_reason_for_logprobsdeltar   choice_datachunkchoice_hidden_stateslast_token_hidden_stateshidden_states_chunkfirst_routed_expertsrouted_experts_chunkr   final_usage_chunkfinal_usage_dataeerrorr(   r(   r)   r      s,  






b




z3OpenAIServingCompletion._generate_completion_stream8Union[CompletionResponse, ErrorResponse, ORJSONResponse]c              
     s   z| j ||}| I dH }W n ty* } z| t|W  Y d}~S d}~ww t|ts3|g}| ||t	t

 }|S )z'Handle non-streaming completion requestN)r    r   	__anext__
ValueErrorcreate_error_responser+   r4   r5   _build_completion_responser   r   )r%   r\   r.   r8   	generatorretr   responser(   r(   r)   _handle_non_streaming_requestp  s$   

z5OpenAIServingCompletion._handle_non_streaming_requestr   List[Dict[str, Any]]r   r   r   c              	   C  s  g }d}g }|j r| |}d}|d }t||}t||}	d}
|s$|	r*t||	d}
t|D ]m\}}|d }|rC||j }|| | }d}|jdurq|rY|d d }|d d	 }nd}d}t|||d 	d
g |d 	dg d}t
||}|d d }t||||r|d nd|rd|v r|d nd|d}|| q.| jjj}tj||j|d}t|d d d |j|||d|d d d i|
dS )z1Build completion response from generation resultsFTr   N)r   cached_tokens_detailsr;   r   r   r   r   r   r   r   r|   r   )r   r;   rQ   r   r   r   r   r   weight_version)r   rX   r   r   r   metadatar   )rP   _prepare_echo_promptsr   r   r   	enumeratern   rQ   r   r   r   r   appendr    r   r   r   calculate_response_usager   rX   )r%   r.   r   r   r   rP   echo_prompts	first_retr   r   response_sglextidxret_itemr;   prompt_indexrQ   r   r   r   r   r   cache_reportr   r(   r(   r)   r     s   





z2OpenAIServingCompletion._build_completion_responser   c                 C  s   t |jtr	|jS t |jtrRt |jd tr|j||j  S t |jd tr1| jjj|jddS t |jd trRt |jd d trR| jjj|j||j  ddS dS )z$Get echo text for streaming responser   Trq   r   )	r4   r3   r+   r5   rn   r   r    	tokenizerdecode)r%   r.   r   r(   r(   r)   r     s"   z&OpenAIServingCompletion._get_echo_text	List[str]c                   s   t |jtrt |jd tr|jS t |jtr)t |jd tr) fdd|jD S t |jtrBt |jd trB jjj|jddgS |jgS )z/Prepare echo prompts for non-streaming responser   c                   s   g | ]} j jj|d dqS )Tr   )r    r   r   )r0   r3   r,   r(   r)   
<listcomp>  s    zAOpenAIServingCompletion._prepare_echo_prompts.<locals>.<listcomp>Tr   )r4   r3   r5   r+   r   r    r   r   )r%   r.   r(   r,   r)   r     s   
z-OpenAIServingCompletion._prepare_echo_prompts)r    r   r!   r   )r*   r+   )r.   r   r*   r/   r"   )r.   r   r8   r
   r*   r9   )r.   r   r*   r^   )r\   r   r.   r   r8   r
   r*   r   )r\   r   r.   r   r8   r
   r*   r   )r\   r   r.   r   r8   r
   r*   r   )r.   r   r   r   r   r   r*   r   )r.   r   r   r   r*   r+   )r.   r   r*   r   )__name__
__module____qualname____doc__r$   r-   r7   r]   rU   r   r   r   r   r   r   __classcell__r(   r(   r&   r)   r   (   s    


A
,
 
9

]r   )0
__future__r   loggingr   typingr   r   r   r   r   r   r	   fastapir
   fastapi.responsesr   r   &sglang.srt.entrypoints.openai.protocolr   r   r   r   r   r   r   *sglang.srt.entrypoints.openai.serving_baser   -sglang.srt.entrypoints.openai.usage_processorr   #sglang.srt.entrypoints.openai.utilsr   r   r   r   sglang.srt.managers.io_structr   (sglang.srt.parser.code_completion_parserr   sglang.utilsr   $sglang.srt.managers.template_managerr   %sglang.srt.managers.tokenizer_managerr   	getLoggerr   rR   r   r(   r(   r(   r)   <module>   s$    $$	
