o
    پi'                     @  s   d dl mZ d dlZd dlZd dlZd dlZd dlmZmZ d dl	m
Z
mZmZmZmZmZ d dlZd dlmZmZ d dlmZmZ d dlmZ d dlmZmZ d d	lmZmZ d d
lm Z  e
rfd dl!m"Z" e#e$Z%G dd deZ&dS )    )annotationsN)ABCabstractmethod)TYPE_CHECKINGAnyListOptionalTupleUnion)HTTPExceptionRequest)ORJSONResponseStreamingResponse)DS32EncodingError)ErrorResponseOpenAIServingRequest)EmbeddingReqInputGenerateReqInput)
ServerArgs)TokenizerManagerc                   @  s   e Zd ZdZd@ddZdAd
dZdBddZdCddZedDddZ	dEddZ
dEddZe		dFdGd#d$ZdHd(d)ZdId+d,ZdJd.d/Z	0	1	dKdLd8d9Z	0	1dMdNd:d;Zd<d= Zd>d? ZdS )OOpenAIServingBasez0Abstract base class for OpenAI endpoint handlerstokenizer_managerr   c                 C  s<   || _ t| j jtr| j jjrt| j jj| _d S d | _d S )N)r   
isinstanceserver_argsr   'tokenizer_metrics_allowed_custom_labelssetallowed_custom_labels)selfr    r   ^/home/ubuntu/.local/lib/python3.10/site-packages/sglang/srt/entrypoints/openai/serving_base.py__init__   s   zOpenAIServingBase.__init__modelstrreturnTuple[str, Optional[str]]c                 C  s@   d|vr|dfS | dd}|d  }|d  pd}||fS )zParse 'base-model:adapter-name' syntax to extract LoRA adapter.

        Returns (base_model, adapter_name) or (model, None) if no colon present.
        :N   r   )splitstrip)r   r!   parts
base_modeladapter_namer   r   r   _parse_model_parameter(   s   z(OpenAIServingBase._parse_model_parameterrequest_modelexplicit_lora_path)Optional[Union[str, List[Optional[str]]]]c                 C  s   |  |\}}|dur|S |S )zResolve LoRA adapter with priority: model parameter > explicit lora_path.

        Returns adapter name or None. Supports both single values and lists (batches).
        N)r,   )r   r-   r.   _adapter_from_modelr   r   r   _resolve_lora_path7   s   	z$OpenAIServingBase._resolve_lora_pathrequestr   raw_requestr   ,Union[Any, StreamingResponse, ErrorResponse]c              
     s  t   }t  }zMt  }| |}t  | }|r!| |W S | ||\}}	t|ttfr9||_||_	||_
t|drL|jrL| ||	|I dH W S | ||	|I dH W S  tyu }
 z| j|
jt|
j|
jdW  Y d}
~
S d}
~
w ty }
 z| jt|
dddW  Y d}
~
S d}
~
w ty }
 ztd|
  | jt|
dddW  Y d}
~
S d}
~
w ty }
 ztd|
  | jdt|
 d	d
dW  Y d}
~
S d}
~
ww )zHandle the specific request type with common pattern
        If you want to override this method, you should be careful to record the validation time.
        streamNmessageerr_typestatus_code
BadRequest  zDS32EncodingError: Error in request: zInternal server error: InternalServerErrori  )timeperf_counter_validate_requestcreate_error_response_convert_to_internal_requestr   r   r   validation_timereceived_timereceived_time_perfhasattrr6   _handle_streaming_request_handle_non_streaming_requestr   detailr"   r:   
ValueErrorr   loggerinfo	Exception	exception)r   r3   r4   rE   rF   validation_start	error_msgrD   adapted_requestprocessed_requester   r   r   handle_requestI   sh   
z OpenAIServingBase.handle_requestc                 C     dS z)Generate request ID based on request typeNr   r   r   r   r   _request_id_prefix   s   z$OpenAIServingBase._request_id_prefixOptional[str]c                 C  rV   rW   )getattrrY   uuiduuid4hex)r   r3   ridr   r   r   _generate_request_id_base      z+OpenAIServingBase._generate_request_id_basec                 C  s^   g }dD ]!}t ||d}|r%t|ts td| dt|j || q|r-d|S dS )z[Compute the final extra_key by concatenating cache_salt and extra_key if both are provided.)
cache_salt	extra_keyNz	Value of z must be a string, but got  )r[   r   r"   	TypeErrortype__name__appendjoin)r   r3   r)   keyvaluer   r   r   _compute_extra_key   s   

z$OpenAIServingBase._compute_extra_keyNrD   float-tuple[GenerateReqInput, OpenAIServingRequest]c                 C  rV   )z)Convert OpenAI request to internal formatNr   )r   r3   r4   rD   r   r   r   rC      s   z.OpenAIServingBase._convert_to_internal_requestrR   r   7Union[StreamingResponse, ErrorResponse, ORJSONResponse]c                      | j | jj ddddS )zqHandle streaming request

        Override this method in child classes that support streaming requests.
        z$ does not support streaming requestsNotImplementedError  r7   rB   	__class__rg   r   rR   r3   r4   r   r   r   rH         
z+OpenAIServingBase._handle_streaming_request)Union[Any, ErrorResponse, ORJSONResponse]c                   rp   )zyHandle non-streaming request

        Override this method in child classes that support non-streaming requests.
        z( does not support non-streaming requestsrq   rr   r7   rs   ru   r   r   r   rI      rv   z/OpenAIServingBase._handle_non_streaming_requestr0   c                 C  rV   )zValidate requestNr   )r   r0   r   r   r   rA      ra   z#OpenAIServingBase._validate_requestBadRequestErrorr<   r8   r9   r:   intparamr   c                 C  s"   t d||||d}t| |dS )zCreate an error responseerrorobjectr8   rf   rz   code)contentr:   )r   r   
model_dump)r   r8   r9   r:   rz   r{   r   r   r   rB      s   	z'OpenAIServingBase.create_error_responsec                 C  s$   t d||d|d}td| iS )z!Create a streaming error responser{   Nr|   )r   jsondumpsr   )r   r8   r9   r:   r{   r   r   r   create_streaming_error_response   s   z1OpenAIServingBase.create_streaming_error_responsec              
     s    j r jjjs
d S d } jjj}z|r#|j|r#t|j|nd }W n tj	yC } zt
d|  d }W Y d }~nd }~ww t|trT fdd| D }|S )Nr=   c                   s    i | ]\}}| j v r||qS r   )r   ).0labelrk   rX   r   r   
<dictcomp>
  s
    
z;OpenAIServingBase.extract_custom_labels.<locals>.<dictcomp>)r   r   r   &tokenizer_metrics_custom_labels_headerheadersgetorjsonloadsr   JSONDecodeErrorrL   rO   r   dictitems)r   r4   custom_labelsheader
raw_labelsrT   r   rX   r   extract_custom_labels   s2   


z'OpenAIServingBase.extract_custom_labelsc                 C  s   |d u rd S |j dS )Nzx-smg-routing-key)r   r   )r   r4   r   r   r   extract_routing_key  s   z%OpenAIServingBase.extract_routing_key)r   r   )r!   r"   r#   r$   )r-   r"   r.   r/   r#   r/   )r3   r   r4   r   r#   r5   )r#   r"   )r3   r   r#   rZ   )NN)r3   r   r4   r   rD   rm   r#   rn   )rR   r   r3   r   r4   r   r#   ro   )rR   r   r3   r   r4   r   r#   rw   )r0   r   r#   rZ   )rx   r<   N)
r8   r"   r9   r"   r:   ry   rz   rZ   r#   r   )rx   r<   )r8   r"   r9   r"   r:   ry   r#   r"   )rg   
__module____qualname____doc__r    r,   r2   rU   r   rY   r`   rl   rC   rH   rI   rA   rB   r   r   r   r   r   r   r   r      s4    



>


	

r   )'
__future__r   r   loggingr?   r\   abcr   r   typingr   r   r   r   r	   r
   r   fastapir   r   fastapi.responsesr   r   ,sglang.srt.entrypoints.openai.encoding_dsv32r   &sglang.srt.entrypoints.openai.protocolr   r   sglang.srt.managers.io_structr   r   sglang.srt.server_argsr   %sglang.srt.managers.tokenizer_managerr   	getLoggerrg   rL   r   r   r   r   r   <module>   s$     
