o
    پi                  	   @   sj  U d Z ddlZddlZddlZddlmZ ddlmZmZm	Z	m
Z
mZmZmZmZ ddlmZmZmZmZmZmZ ddlmZ ddlmZ ddlmZmZmZmZmZ dd	l m!Z! zdd
l"m#Z# W n   eZ#Y ddl$m%Z% e&e'Z(dZ)G dd deZ*G dd deZ+G dd deZ,G dd deZ-G dd deZ.G dd deZ/G dd deZ0G dd deZ1G dd deZ2G dd  d eZ3G d!d" d"eZ4G d#d$ d$eZ5G d%d& d&eZ6G d'd( d(eZ7G d)d* d*eZ8ee8e#f Z9ee:d+< eee!d, e9f ee!d- ef f Z;ee:d.< G d/d0 d0eZ<G d1d2 d2eZ=G d3d4 d4eZ>G d5d6 d6eZ?G d7d8 d8eZ@G d9d: d:eZAG d;d< d<eZBG d=d> d>eZCG d?d@ d@eZDG dAdB dBeZEG dCdD dDeZFG dEdF dFeZGG dGdH dHeZHG dIdJ dJeZIG dKdL dLeZJG dMdN dNeZKG dOdP dPeZLG dQdR dReZMeeGeKeLeMf ZNeeGeKeLf ZOeePe	eO f ZQG dSdT dTeZRG dUdV dVeZSG dWdX dXeZTG dYdZ dZeZUeeTeUf ZVG d[d\ d\eZWG d]d^ d^eZG d_d` d`eZXG dadb dbeZG dcdd ddeZYG dedf dfeZZG dgdh dheZ[G didj djeZ\G dkdl dleZ]G dmdn dneZ^G dodp dpeZ_G dqdr dreZ`ee	ea e	e	ea  ePe	eP e	e` f ZbG dsdt dteZcG dudv dveZdeePe	eP e	ea f ZeG dwdx dxeZfG dydz dzeZgG d{d| d|eZhG d}d~ d~eZiG dd deZjG dd deZkG dd deZlG dd deZmG dd deZnG dd deZoG dd deZpG dd deZqeeYeAecefejelenepf ZrG dd deZsG dd deZteedef Zuee:d< G dd deZvG dd deZwG dd deZxG dd deZyeG dd dZzG dd de
Z{G dd deZ|eedef Zuee:d< dS )z'Pydantic models for OpenAI API protocol    N)	dataclass)AnyDictList
NamedTupleOptionalTuple	TypeAliasUnion)ResponseFunctionToolCallResponseInputItemParamResponseOutputItemResponseOutputMessageResponseOutputTextResponseReasoningItem)
ToolChoice)Tool)	BaseModelFieldfield_validatormodel_serializermodel_validator)Literal)StructuralTag)convert_json_schema_to_strdefaultc                   @   sx   e Zd ZU dZeed< dZeed< edd dZe	ed< d	Z
eed
< dZee ed< dZee ed< dZee	 ed< dS )	ModelCardzModel cards.idmodelobjectc                   C      t t S Ninttime r%   r%   Z/home/ubuntu/.local/lib/python3.10/site-packages/sglang/srt/entrypoints/openai/protocol.py<lambda>:       zModelCard.<lambda>default_factorycreatedsglangowned_byNrootparentmax_model_len)__name__
__module____qualname____doc__str__annotations__r   r   r+   r#   r-   r.   r   r/   r0   r%   r%   r%   r&   r   5   s   
 r   c                   @   s4   e Zd ZU dZdZeed< eedZ	e
e ed< dS )	ModelListz#Model list consists of model cards.listr   r)   dataN)r1   r2   r3   r4   r   r5   r6   r   r8   r9   r   r   r%   r%   r%   r&   r7   A   s   
 r7   c                   @   sB   e Zd ZU dZeed< eed< eed< dZee ed< eed< dS )ErrorResponseerrorr   messagetypeNparamcode)	r1   r2   r3   r   r5   r6   r>   r   r#   r%   r%   r%   r&   r:   H   s   
 r:   c                   @   sv   e Zd ZU eedZee ed< eedZ	ee
e  ed< eedZee ed< eedZee
eeef   ed< dS )LogProbsr)   text_offsettoken_logprobstokenstop_logprobsN)r1   r2   r3   r   r8   rA   r   r#   r6   rB   r   floatrC   r5   rD   r   r%   r%   r%   r&   r@   P   s
   
 &r@   c                   @   s*   e Zd ZU eed< ee ed< eed< dS )
TopLogprobtokenbyteslogprobN)r1   r2   r3   r5   r6   r   r#   rE   r%   r%   r%   r&   rF   W      
 rF   c                   @   s6   e Zd ZU eed< ee ed< eed< ee ed< dS )ChatCompletionTokenLogprobrG   rH   rI   rD   N)	r1   r2   r3   r5   r6   r   r#   rE   rF   r%   r%   r%   r&   rK   ]   s
   
 rK   c                   @   s   e Zd ZU ee ed< dS )ChoiceLogprobscontentN)r1   r2   r3   r   rK   r6   r%   r%   r%   r&   rL   d   s   
 rL   c                   @   s\   e Zd ZU dZdZeed< dZeed< dZe	e ed< dZ
e	e ed< edd	d
d ZdS )CachedTokensDetailsz4Detailed breakdown of cached tokens by cache source.r   devicehostNstoragestorage_backendwrapmodec                 C   s8   || }| j d u r|dd  | jd u r|dd  |S )NrQ   rR   )rQ   poprR   selfhandlerr9   r%   r%   r&   
_serializer   s   

zCachedTokensDetails._serialize)r1   r2   r3   r4   rO   r#   r6   rP   rQ   r   rR   r5   r   rZ   r%   r%   r%   r&   rN   i   s   
 rN   c                   @      e Zd ZU dZdZeed< dS )PromptTokensDetailszDetails about prompt tokens.r   cached_tokensNr1   r2   r3   r4   r]   r#   r6   r%   r%   r%   r&   r\   }      
 r\   c                   @   sV   e Zd ZU dZeed< dZeed< dZee ed< dZ	ee
 ed< dZee ed< dS )	UsageInfor   prompt_tokenstotal_tokenscompletion_tokensNprompt_tokens_detailsreasoning_tokens)r1   r2   r3   ra   r#   r6   rb   rc   r   rd   r\   re   r%   r%   r%   r&   r`      s   
 r`   c                   @   s.   e Zd ZU dZee ed< dZee ed< dS )StreamOptionsFinclude_usagecontinuous_usage_statsN)r1   r2   r3   rg   r   boolr6   rh   r%   r%   r%   r&   rf      s   
 rf   c                   @   sV   e Zd ZU eed< dZee ed< edddZee	ee
f  ed< dZee ed< dS )	JsonSchemaResponseFormatnameNdescriptionschemaaliasr   schema_Fstrict)r1   r2   r3   r5   r6   rl   r   r   rp   r   r   rq   ri   r%   r%   r%   r&   rj      s
   
  rj   c                   @   s*   e Zd ZU ed ed< dZee ed< dS )ResponseFormat)textjson_objectjson_schemar=   Nru   )r1   r2   r3   r   r6   ru   r   rj   r%   r%   r%   r&   rr      s   
 rr   c                   @   s>   e Zd ZU eed< edddZeeee	f  ed< eed< dS )StructuresResponseFormatbeginrm   Nrn   rp   end)
r1   r2   r3   r5   r6   r   rp   r   r   r   r%   r%   r%   r&   rv      s   
  rv   c                   @   s2   e Zd ZU ed ed< ee ed< ee ed< dS )!LegacyStructuralTagResponseFormatstructural_tagr=   
structurestriggersN)r1   r2   r3   r   r6   r   rv   r5   r%   r%   r%   r&   ry      s   
 ry   StructuralTagResponseFormatrz   ru   ToolCallConstraintc                   @   s"   e Zd ZU eed< dZeed< dS )FileRequestfilebatchpurposeN)r1   r2   r3   rH   r6   r   r5   r%   r%   r%   r&   r      s   
 r   c                   @   sB   e Zd ZU eed< dZeed< eed< eed< eed< eed< dS )	FileResponser   r   r   rH   
created_atfilenamer   N)r1   r2   r3   r5   r6   r   r#   r%   r%   r%   r&   r      s   
 r   c                   @   s*   e Zd ZU eed< dZeed< eed< dS )FileDeleteResponser   r   r   deletedN)r1   r2   r3   r5   r6   r   ri   r%   r%   r%   r&   r      rJ   r   c                   @   s6   e Zd ZU eed< eed< eed< dZee ed< dS )BatchRequestinput_file_idendpointcompletion_windowNmetadata)r1   r2   r3   r5   r6   r   r   dictr%   r%   r%   r&   r      s   
 r   c                   @   s  e Zd ZU eed< dZeed< eed< dZee ed< eed< eed< d	Z	eed
< dZ
ee ed< dZee ed< eed< dZee ed< dZee ed< dZee ed< dZee ed< dZee ed< dZee ed< dZee ed< dZee ed< dZee ed< dZee ed< dS )BatchResponser   r   r   r   Nerrorsr   r   
validatingstatusoutput_file_iderror_file_idr   in_progress_at
expires_atfinalizing_atcompleted_at	failed_at
expired_atcancelling_atcancelled_atrequest_countsr   )r1   r2   r3   r5   r6   r   r   r   r   r   r   r   r#   r   r   r   r   r   r   r   r   r   r   r%   r%   r%   r&   r      s*   
 r   c                   @   sv  e Zd ZU eeddZeed< ee	e
 e	e	e
  ee	e f ed< dZee
 ed< dZeed< d	Zeed
< dZeeeef  ed< dZee
 ed< dZe
ed< dZe
ed< d	Zeed< dZee
 ed< dZeeee	e f  ed< dZeed< dZee ed< dZee ed< dZeed< dZeed< dZee ed< dZ eed< dZ!eed< dZ"eed< dZ#e
ed< d	Z$eed < d!Z%e
ed"< dZ&ee ed#< dZ'ee ed$< dZ(ee ed%< dZ)eed&< dZ*ee	e
  ed'< dZ+eeee	e f  ed(< dZ,eed)< dZ-eed*< d+Z.eed,< dZ/eee	ee  ee f  ed-< dZ0ee ed.< dZ1eee2e3f  ed/< dZ4ee ed0< dZ5ee ed1< dZ6eee	e ef  ed2< dZ7eee	ee
  e
f  ed3< dZ8eee	e
 e
f  ed4< dZ9ee
 ed5< dZ:eee	e ef  ed6< dZ;eee	e ef  ed7< dZ<eee	e ef  ed8< dZ=ee
 ed9< dZ>eeeef  ed:< e?de@d;d< ZAdS )=CompletionRequestHModel name. Supports LoRA adapters via 'base-model:adapter-name' syntax.r   rl   r   promptNbest_ofFecho        frequency_penalty
logit_biaslogprobs   
max_tokens   npresence_penaltyseedstopstreamstream_optionssuffix      ?temperaturetop_puserreturn_hidden_statesreturn_routed_expertsreturn_cached_tokens_detailstop_kmin_pr   
min_tokensru   regexebnfrepetition_penaltystop_token_ids
stop_regexno_stop_trim
ignore_eosTskip_special_tokens	lora_pathsession_paramsresponse_formatcustom_paramscustom_logit_processorbootstrap_hostbootstrap_portbootstrap_roomdata_parallel_rankrid	extra_key
cache_saltprioritycustom_labelsc                 C   s   |d ur|dkrt d|S )Nr   zmax_tokens must be positive
ValueErrorclsvr%   r%   r&   validate_max_tokens_positive/     z.CompletionRequest.validate_max_tokens_positive)Br1   r2   r3   r   DEFAULT_MODEL_NAMEr   r5   r6   r
   r   r#   r   r   r   ri   r   rE   r   r   r   r   r   r   r   r   r   r   rf   r   r   r   r   r   r   r   r   r   r   ru   r   r   r   r   r   r   r   r   r   r   r   rr   r}   r   r   r   r   r   r   r   r   r   r   r   r   classmethodr   r%   r%   r%   r&   r      sl   
 $$ r   c                   @   sD   e Zd ZU dZdZee ed< dZee	 ed< e
dddd ZdS )	SglExtzSGLang extension fields for OpenAI-compatible responses.

    Future SGLang-specific extensions to OpenAI-compatible response objects
    should be added as fields here rather than directly on the choice object.
    Nrouted_expertscached_tokens_detailsrS   rT   c                 C   s   || }dd |  D S )Nc                 S   s   i | ]\}}|d ur||qS r!   r%   ).0kr   r%   r%   r&   
<dictcomp>E  s    z%SglExt._serialize.<locals>.<dictcomp>)itemsrW   r%   r%   r&   rZ   A  s   zSglExt._serialize)r1   r2   r3   r4   r   r   r5   r6   r   rN   r   rZ   r%   r%   r%   r&   r   7  s   
 r   c                   @   z   e Zd ZU eed< eed< dZee ed< dZ	ee
d  ed< dZedeef ed< dZee ed< ed	d
dd ZdS )CompletionResponseChoiceindexrs   Nr   r   lengthcontent_filterabortfinish_reasonmatched_stophidden_statesrS   rT   c                 C   "   || }| j d u r|dd  |S Nr   r   rV   rW   r%   r%   r&   rZ   P     
z#CompletionResponseChoice._serializer1   r2   r3   r#   r6   r5   r   r   r@   r   r   r   r
   r   r   r   rZ   r%   r%   r%   r&   r   H     
 r   c                   @      e Zd ZU eed< dZeed< edd dZeed< eed< e	e
 ed	< eed
< dZeeeef  ed< dZee ed< edddd ZdS )CompletionResponser   text_completionr   c                   C   r    r!   r"   r%   r%   r%   r&   r'   [  r(   zCompletionResponse.<lambda>r)   r+   r   choicesusageNr   sglextrS   rT   c                 C   r   Nr   r   rV   rW   r%   r%   r&   rZ   b  r   zCompletionResponse._serialize)r1   r2   r3   r5   r6   r   r   r+   r#   r   r   r`   r   r   r   r   r   r   r   rZ   r%   r%   r%   r&   r   X     
 r   c                   @   r   )CompletionResponseStreamChoicer   rs   Nr   r   r   r   r   rS   rT   c                 C   r   r   r   rW   r%   r%   r&   rZ   r  r   z)CompletionResponseStreamChoice._serializer   r%   r%   r%   r&   r   j  r   r   c                   @   ~   e Zd ZU eed< dZeed< edd dZeed< eed< e	e
 ed	< d
Zee ed< d
Zee ed< edddd Zd
S )CompletionStreamResponser   r   r   c                   C   r    r!   r"   r%   r%   r%   r&   r'   }  r(   z!CompletionStreamResponse.<lambda>r)   r+   r   r   Nr   r   rS   rT   c                 C   r   r   r   rW   r%   r%   r&   rZ     r   z#CompletionStreamResponse._serialize)r1   r2   r3   r5   r6   r   r   r+   r#   r   r   r   r   r`   r   r   r   rZ   r%   r%   r%   r&   r   z     
 r   c                   @   "   e Zd ZU ed ed< eed< dS )$ChatCompletionMessageContentTextPartrs   r=   N)r1   r2   r3   r   r6   r5   r%   r%   r%   r&   r        
 r   c                   @   sJ   e Zd ZU eed< dZeed  ed< dZee	 ed< dZ
ee	 ed< dS )$ChatCompletionMessageContentImageURLurlauto)r  lowhighdetailNmax_dynamic_patchmin_dynamic_patch)r1   r2   r3   r5   r6   r  r   r   r  r#   r	  r%   r%   r%   r&   r    s
   
 r  c                   @   s6   e Zd ZU eed< dZee ed< dZee ed< dS )$ChatCompletionMessageContentVideoURLr  Nr  r	  )	r1   r2   r3   r5   r6   r  r   r#   r	  r%   r%   r%   r&   r
    s   
 r
  c                   @   s   e Zd ZU eed< dS )$ChatCompletionMessageContentAudioURLr  N)r1   r2   r3   r5   r6   r%   r%   r%   r&   r    s   
 r  c                   @   s6   e Zd ZU ed ed< eed< dZeed  ed< dS )%ChatCompletionMessageContentImagePart	image_urlr=   image)r  zmulti-imagesvideo
modalitiesN)r1   r2   r3   r   r6   r  r  r   r%   r%   r%   r&   r    s   
 r  c                   @   r   )%ChatCompletionMessageContentVideoPart	video_urlr=   N)r1   r2   r3   r   r6   r
  r%   r%   r%   r&   r    r  r  c                   @   r   )%ChatCompletionMessageContentAudioPart	audio_urlr=   N)r1   r2   r3   r   r6   r  r%   r%   r%   r&   r    r  r  c                   @   s>   e Zd ZU dZdZee ed< dZeee	ee
f B  ed< dS )FunctionResponsezFunction response.Nrk   	arguments)r1   r2   r3   r4   rk   r   r5   r6   r  r   r   r%   r%   r%   r&   r    s   
  r  c                   @   sJ   e Zd ZU dZdZee ed< dZee	 ed< dZ
ed ed< eed< dS )ToolCallzTool call response.Nr   r   functionr=   )r1   r2   r3   r4   r   r   r5   r6   r   r#   r=   r   r  r%   r%   r%   r&   r    s   
 r  c                   @   s   e Zd ZU ed ed< eddZeee	e
 df ed< dZee ed< dZee ed< dZee ed< eddgd	Zee	e  ed
< eddgd	Zee	e  ed< edddedd ZdS )!ChatCompletionMessageGenericParam)system	assistanttoolr  	developerroleN)r   rM   tool_call_idrk   reasoning_contentr   examples
tool_callstoolsbeforerT   c                 C   s.   t |tr| }|dvrtd|S td)N>   r  r  r  r  r  zc'role' must be one of 'system', 'developer', 'assistant', 'tool', or 'function' (case-insensitive).z'role' must be a string)
isinstancer5   lowerr   )r   r   v_lowerr%   r%   r&   _normalize_role  s   
z1ChatCompletionMessageGenericParam._normalize_role)r1   r2   r3   r   r6   r   rM   r
   r5   r    ChatCompletionMessageContentPartr  r   rk   r   r#  r  r$  r   r   r   r)  r%   r%   r%   r&   r    s   
 
r  c                   @   s.   e Zd ZU ed ed< eeee f ed< dS )ChatCompletionMessageUserParamr   r  rM   N)	r1   r2   r3   r   r6   r
   r5   r   r*  r%   r%   r%   r&   r+    s   
 r+  c                   @   sP   e Zd ZU dZeddgdZee ed< eed< dZ	ee
 ed< dZeed< dS )	FunctionzFunction descriptions.Nr!  rl   rk   
parametersFrq   )r1   r2   r3   r4   r   rl   r   r5   r6   r-  r   rq   ri   r%   r%   r%   r&   r,    s   
 r,  c                   @   s0   e Zd ZU dZeddgdZeed< eed< dS )r   zFunction wrapper.r  r!  r=   N)	r1   r2   r3   r4   r   r=   r5   r6   r,  r%   r%   r%   r&   r     s   
 r   c                   @   s"   e Zd ZU dZdZee ed< dS )ToolChoiceFuncNamez!The name of tool choice function.Nrk   )r1   r2   r3   r4   rk   r   r5   r6   r%   r%   r%   r&   r.    s   
 r.  c                   @   s4   e Zd ZU dZeed< eddgdZed ed< dS )r   zThe tool choice definition.r  r!  r=   N)	r1   r2   r3   r4   r.  r6   r   r=   r   r%   r%   r%   r&   r   
  s   
 r   c                   @   s  e Zd ZU ee ed< eeddZe	ed< dZ
eed< dZeee	ef  ed< d	Zeed
< dZee ed< eddddZee ed< edddZee ed< dZeed< dZeed< dZeeeef  ed< dZee ed< dZeee	ee	 f  ed< d	Zeed< dZee ed< dZ ee ed< dZ!ee ed< dZ"ee	 ed< eddgdZ#eee$  ed< edd gdZ%ee&e'd! f ed"< d	Z(eed#< d	Z)eed$< d	Z*eed%< ed&d'dZ+ee'd(  ed)< dZ,ee ed*< dZ-ee ed+< d,Z.eed-< dZ/ee	 ed.< dZ0ee	 ed/< dZ1ee ed0< dZ2eee  ed1< dZ3eee	ee	 f  ed2< d	Z4eed3< d	Z5eed4< d	Z6eed5< d6Z7eed7< dZ8eeeee	  ee	 f  ed8< dZ9ee ed9< d6Z:eed:< d6Z;eed;< dZ<ee ed<< dZ=ee ed=< dZ>ee ed>< dZ?eeeee	  e	f  ed?< dZ@ee ed@< dZAeeee	 e	f  edA< dZBeeee	 e	f  edB< dZCeeee	 e	f  edC< dZDee edD< dZEeeee	 e	f  edE< dZFeeeee  ef  edF< dZGeeee ef  edG< dZHee edH< dIdIdJddIdKZIeJdLdMeKdNdO ZLeJdLdMeKdPefdQdRZMeJdLdMeKdSdT ZN	dZdee	 dUee	eOf dVeeP dWee	eOf fdXdYZQdS )[ChatCompletionRequestmessagesr   r   r   r   r   Nr   Fr   rD   zDmax_tokens is deprecated in favor of the max_completion_tokens fieldzKThe maximum number of tokens that can be generated in the chat completion. )r   
deprecatedrl   r   zThe maximum number of completion tokens for a chat completion request, including visible output tokens and reasoning tokens. Input tokens are not included. max_completion_tokensr   r   r   r   r   r   r   r   r   r   r   r!  r$  r  noner  requiredr3  tool_choicer   r   r   mediuma)  Constrains effort on reasoning for reasoning models. 'low' is the least effort, 'high' is the most effort. Reducing reasoning effort can result in faster responses and fewer tokens used on reasoning in a response. Currently only supported for OpenAI models in the harmony path, i.e GPT-OSS models.r  r7  r  reasoning_effortr   r   r   r   r   r   r   r   r   r   r   continue_final_messageTr   r   r   separate_reasoningstream_reasoningchat_template_kwargsr  r	  r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r%  rT   c                 C   s4   | dd u r| dd u rd|d< |S d|d< |S )Nr6  r$  r3  r  )get)r   valuesr%   r%   r&   set_tool_choice_defaultu  s   z-ChatCompletionRequest.set_tool_choice_defaultr@  c                 C   s   | d}|d u r|S t|trY| dp| d}|dv r"||d< | dd ur.| dn| dd}t|trA|  dv }|rY| d	}t|tsOi }|d
d ||d	< |S )N	reasoningeffortr9  >   r  r  r7  enabledenableF>   1yonyestruer=  thinkingT)r?  r&  r   r5   stripr'  
setdefault)r   r@  rrC  rD  ctkr%   r%   r&   normalize_reasoning_inputs  s(   






z0ChatCompletionRequest.normalize_reasoning_inputsc                 C   s   | d}|s	|S | ddkr|S |dd }| d}|r!|S |rO| dd}d}d|v rGd	|d v rG|d d	d }|rG| d
drGd}|||d|d< |S )Nr   r=   ru   rm   titleSchemaF
propertiesrq   r   T)rk   rm   rq   )r?  rV   )r   r@  r   rm   ru   name_strict_itemr%   r%   r&   set_json_schema  s*   


z%ChatCompletionRequest.set_json_schemamodel_generation_configtool_call_constraintreturnc           
         s  dt f fdd}jdu rdnjdd}i d|ddjp$jd	jd
|djdjd|dd|dd|ddjdj	d|ddj
djdjdjdjjjjj|d}jrjjdkrtjjj|d< n"jrjjdkrd|d< njrjjdkrtjjdd|d< |dp|dp|dp|d}|r|rtd |S |r|\}}	|dkrt|	jdd||< |S |dkrt|	||< |S |	||< |S )z
        Convert request to sampling parameters.
        Priority: user value > model generation_config > OpenAI defaults
        
param_namec                    s(   t | }|d u r | j|  S |S r!   )getattrr?  _DEFAULT_SAMPLING_PARAMS)r[  valuerX  rX   r%   r&   	get_param  s   

z;ChatCompletionRequest.to_sampling_params.<locals>.get_paramNTspaces_between_special_tokensr   max_new_tokensmin_new_tokensr   r   r   r   r   r   r   r   r   r   r   r   r   r   )r   r   r   sampling_seedra  ru   rt   z{"type": "object"}rz   )by_aliasz7Constrained decoding is not compatible with tool calls.)r5   r=  r?  r2  r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r=   r   ru   rp   
model_dumploggerwarning)
rX   r   rX  rY  r`  ra  sampling_paramshas_existing_constraintsconstraint_typeconstraint_valuer%   r_  r&   to_sampling_params  s   
	






z(ChatCompletionRequest.to_sampling_paramsr!   )Rr1   r2   r3   r   ChatCompletionMessageParamr6   r   r   r   r5   r   rE   r   r   r   r   ri   rD   r#   r   r2  r   r   r   r
   rr   r}   r   r   r   r   rf   r   r   r   r$  r   r6  r   r   r   r   r   r9  r   r   r   r   r   r   r   r   r   r   r:  r   r   r   r;  r<  r=  r  r	  r   r   r   r   r   r   r   r   r   r   r]  r   r   rA  rP  rW  r   r~   rm  r%   r%   r%   r&   r/    s   
 	$  "

r/  c                   @   s\   e Zd ZU dZee ed< dZee ed< dZee ed< e	ddgdZ
eee  ed< dS )ChatMessageNr  rM   r   r!  r#  )r1   r2   r3   r  r   r5   r6   rM   r   r   r#  r   r  r%   r%   r%   r&   ro    s
   
 "ro  c                   @   s   e Zd ZU eed< eed< dZeee	e
f  ed< dZeed  ed< dZedeef ed< dZee ed< ed	d
dd ZdS )ChatCompletionResponseChoicer   r<   Nr   r   r   r#  r   function_callr   r   r   r   rS   rT   c                 C   r   r   r   rW   r%   r%   r&   rZ   &  r   z'ChatCompletionResponseChoice._serialize)r1   r2   r3   r#   r6   ro  r   r   r
   r@   rL   r   r   r   r5   r   r   r   rZ   r%   r%   r%   r&   rp    s   
 rp  c                   @   r   )ChatCompletionResponser   zchat.completionr   c                   C   r    r!   r"   r%   r%   r%   r&   r'   1  r(   zChatCompletionResponse.<lambda>r)   r+   r   r   r   Nr   r   rS   rT   c                 C   r   r   r   rW   r%   r%   r&   rZ   8  r   z!ChatCompletionResponse._serialize)r1   r2   r3   r5   r6   r   r   r+   r#   r   rp  r`   r   r   r   r   r   r   r   rZ   r%   r%   r%   r&   rs  .  r   rs  c                   @   s~   e Zd ZU dZee ed< dZee ed< dZee ed< e	ddgdZ
eee  ed< dZee ed< edd	d
d ZdS )DeltaMessageNr  rM   r   r!  r#  r   rS   rT   c                 C   r   r   r   rW   r%   r%   r&   rZ   G  r   zDeltaMessage._serialize)r1   r2   r3   r  r   r5   r6   rM   r   r   r#  r   r  r   r   r   rZ   r%   r%   r%   r&   rt  @  s   
 rt  c                   @   s`   e Zd ZU eed< eed< dZeee	e
f  ed< dZeed  ed< dZedeef ed< dS )"ChatCompletionResponseStreamChoicer   deltaNr   rq  r   r   )r1   r2   r3   r#   r6   rt  r   r   r
   r@   rL   r   r   r   r5   r%   r%   r%   r&   ru  O  s   
 ru  c                   @   r   )ChatCompletionStreamResponser   zchat.completion.chunkr   c                   C   r    r!   r"   r%   r%   r%   r&   r'   ^  r(   z%ChatCompletionStreamResponse.<lambda>r)   r+   r   r   Nr   r   rS   rT   c                 C   r   r   r   rW   r%   r%   r&   rZ   d  r   z'ChatCompletionStreamResponse._serialize)r1   r2   r3   r5   r6   r   r   r+   r#   r   ru  r   r   r`   r   r   r   rZ   r%   r%   r%   r&   rw  [  r   rw  c                   @   s>   e Zd ZU dZee ed< dZee ed< dZee ed< dS )MultimodalEmbeddingInputNrs   r  r  )	r1   r2   r3   rs   r   r5   r6   r  r  r%   r%   r%   r&   rx  l  s   
 rx  c                   @   s   e Zd ZU eed< eZeed< dZeed< dZ	e
e ed< dZe
e ed< dZe
eee ef  ed< dZe
e ed	< dZe
eee
e  e
e f  ed
< dS )EmbeddingRequestinputr   rE   encoding_formatN
dimensionsr   r   r   r   )r1   r2   r3   EmbeddingInputr6   r   r   r5   r{  r|  r   r#   r   r   r
   r   r   r   r%   r%   r%   r&   ry  w  s   
 (ry  c                   @   s.   e Zd ZU ee ed< eed< dZeed< dS )EmbeddingObject	embeddingr   r   N)	r1   r2   r3   r   rE   r6   r#   r   r5   r%   r%   r%   r&   r~    s   
 r~  c                   @   s^   e Zd ZU eZeed< eed< dZe	e ed< dZ
e	eee ef  ed< dZe	e ed< dS )ClassifyRequestr   rz  Nr   r   r   )r1   r2   r3   r   r   r5   r6   ClassifyInputr   r   r   r
   r   r   r#   r%   r%   r%   r&   r    s   
 r  c                   @   s2   e Zd ZU eed< eed< ee ed< eed< dS )ClassifyDatar   labelprobsnum_classesN)r1   r2   r3   r#   r6   r5   r   rE   r%   r%   r%   r&   r    s
   
 r  c                   @   sF   e Zd ZU eed< dZeed< eed< eed< ee ed< e	ed< dS )	ClassifyResponser   r8   r   r+   r   r9   r   N)
r1   r2   r3   r5   r6   r   r#   r   r  r`   r%   r%   r%   r&   r    s   
 r  c                   @   s>   e Zd ZU ee ed< eed< dZeed< dZe	e
 ed< dS )EmbeddingResponser9   r   r8   r   Nr   )r1   r2   r3   r   r~  r6   r5   r   r   r   r`   r%   r%   r%   r&   r    s
   
 r  c                   @   s   e Zd ZU dZeeeee f  e	d< dZ
eeeee eee  f  e	d< dZeee  e	d< dZee	d< dZee	d< eZee	d< dS )	ScoringRequestNqueryr   label_token_idsFapply_softmax
item_firstr   )r1   r2   r3   r  r   r
   r5   r   r#   r6   r   r  r  ri   r  r   r   r%   r%   r%   r&   r    s   
 $r  c                   @   sB   e Zd ZU eee  ed< eed< dZee	 ed< dZ
eed< dS )ScoringResponsescoresr   Nr   scoringr   )r1   r2   r3   r   rE   r6   r5   r   r   r`   r   r%   r%   r%   r&   r    s   
 r  c                   @   s   e Zd ZU edddZeed< edddZee ed< eddd	Z	e
e ed
< eddd	Ze
e ed< eddd	Zeed< ededd ZdefddZdS )V1RerankReqInput.zThe query to match against documents. Can be a string (text-only) or a list of content parts for multimodal queries (text, image_url, video_url).rl   r  zList of documents to rank. Each document can be a string (text-only) or a list of content parts for multimodal documents (text, image_url, video_url).	documentsNz#The instruct to the reranker model.r   instructzMaximum number of documents to return. Defaults to returning all documents. If specified value is greater than the total number of documents, all documents will be returned.top_nTzLWhether to return documents in the response. Only included when set to true.return_documentsc                 C   s   |d ur|dk rt d|S )Nr   z5Value error, parameter top_n should be larger than 0.r   r   r%   r%   r&   validate_top_n  r   zV1RerankReqInput.validate_top_nrZ  c                 C   s0   t | jtrdS | jD ]
}t |tr dS qdS )z5Check if the request contains any multimodal content.TF)r&  r  r8   r  )rX   docr%   r%   r&   is_multimodal  s   

zV1RerankReqInput.is_multimodal)r1   r2   r3   r   r  RerankContentr6   r  r   r  r   r5   r  r#   r  ri   r   r   r  r  r%   r%   r%   r&   r    s2   
 r  c                   @   sP   e Zd ZU eed< dZee ed< eed< dZ	ee
 ed< edddd	 ZdS )
RerankResponsescoreNdocumentr   	meta_inforS   rT   c                 C   r   )Nr  )r  rV   rW   r%   r%   r&   rZ     s   
zRerankResponse._serialize)r1   r2   r3   rE   r6   r  r   r5   r#   r  r   r   rZ   r%   r%   r%   r&   r    s   
 r  c                   @   sF   e Zd ZU dZeZeed< eee	e f ed< e
dddZeed< dS )	TokenizeRequestz*Request schema for the /tokenize endpoint.r   r   TzLwhether to add model-specific special tokens (e.g. BOS/EOS) during encoding.r   add_special_tokensN)r1   r2   r3   r4   r   r   r5   r6   r
   r   r   r  ri   r%   r%   r%   r&   r    s   
 r  c                   @   sJ   e Zd ZU dZeee eee  f ed< eeee f ed< eed< dS )TokenizeResponsez+Response schema for the /tokenize endpoint.rC   countr0   N)r1   r2   r3   r4   r
   r   r#   r6   r%   r%   r%   r&   r    s
   
 r  c                   @   sN   e Zd ZU dZeZeed< ee	e
 e	e	e
  f ed< edddZeed< dS )	DetokenizeRequestz,Request schema for the /detokenize endpoint.r   rC   TzHwhether to exclude special tokens (e.g. padding or EOS) during decoding.r   r   N)r1   r2   r3   r4   r   r   r5   r6   r
   r   r#   r   r   ri   r%   r%   r%   r&   r    s   
 r  c                   @   s&   e Zd ZU dZeeee f ed< dS )DetokenizeResponsez-Response schema for the /detokenize endpoint.rs   N)r1   r2   r3   r4   r
   r5   r   r6   r%   r%   r%   r&   r  #  s   
 r  c                   @   s.   e Zd ZU dZedddZeed  ed< dS )ResponseReasoningParamz#Reasoning parameters for responses.r7  z4Constrains effort on reasoning for reasoning models.r   r8  rC  N)	r1   r2   r3   r4   r   rC  r   r   r6   r%   r%   r%   r&   r  6  s   
 r  c                   @   s(   e Zd ZU dZeddZed ed< dS )ResponseToolzTool definition for responses.zType of tool to enabler  )web_search_previewcode_interpreterr=   N)r1   r2   r3   r4   r   r=   r   r6   r%   r%   r%   r&   r  ?  s
   
 r  r   ResponseInputOutputItemc                	   @   sj  e Zd ZU dZdZee ed< dZee	e
d   ed< eee	e f ed< dZee ed< dZee ed	< dZee ed
< dZeeeef  ed< dZee ed< dZee ed< dZee ed< dZee ed< dZe
d ed< dZee ed< dZee ed< dZee ed< dZe
d ed< e e!dZ"e	e# ed< dZ$ee ed< dZ%ee ed< dZ&ee
d  ed < dZ'ee ed!< e d"d# d$d%Z(eed&< e dd'd(Z)eed)< e dd*d(Z*ee ed+< e dd,d(Z+ee ed-< d.Z,eed/< d.Z-eed0< dZ.eeee	e f  ed1< d2Z/eed3< d.Z0eed4< d5Z1eed6< d7d5d2d.d5d8Z2	d>d9ed:ee d;eeef fd<d=Z3dS )?ResponsesRequestz'Request body for v1/responses endpoint.F
backgroundN)zcode_interpreter_call.outputsz%computer_call_output.output.image_urlzfile_search_call.resultszmessage.input_image.image_urlzmessage.output_text.logprobszreasoning.encrypted_contentincluderz  instructionsmax_output_tokensmax_tool_callsr   r   Tparallel_tool_callsprevious_response_idrB  r  )r  r   flexscaler   service_tierstorer   r   r4  r6  r)   r$  r   rD   r   disabled)r  r  
truncationr   c                   C   s   dt  j S Nresp_)uuiduuid4hexr%   r%   r%   r&   r'   u  s    zResponsesRequest.<lambda>zgThe request_id related to this request. If the caller does not set it, a random uuid will be generated.)r*   rl   
request_idzRequest priorityr   r   z7Extra key for classifying the request (e.g. cache_salt)r   zCache salt for request cachingr   r   r   r   r   r   r   r   r   r   gffffff?r>  default_max_tokensdefault_paramsrZ  c           	   
   C   s   |du ri }| j durt| j |}n|}|d8 }| j}|du r(|d| jd }| j}|du r8|d| jd }|||| j| j| j| j	| j
| jd	}| D ]\}}||vs\|| du r`|||< qN|S )z.Convert to sampling parameters for generation.N   r   r   )	rb  r   r   r   r   r   r   r   r   )r  minr   r?  r]  r   r   r   r   r   r   r   r   )	rX   r  r  r   r   r   paramskeyr^  r%   r%   r&   rm    s:   

z#ResponsesRequest.to_sampling_paramsr!   )4r1   r2   r3   r4   r  r   ri   r6   r  r   r   r
   r5   r  r  r  r#   r  r   r   r   r   r  r  rB  r  r  r  r   r   rE   r6  r   r8   r$  r  rD   r   r  r   r  r   r   r   r   r   r   r   r   r   r]  rm  r%   r%   r%   r&   r  N  s|   
 	
r  c                   @   r[   )PromptTokenUsageInfozPrompt token usage details.r   r]   Nr^   r%   r%   r%   r&   r    r_   r  c                   @   s  e Zd ZU dZedd dZeed< dZe	d ed< edd dZ
eed	< eed
< eedZeeeeef  ed< e	d ed< dZee ed< dZeed< dZeed< eedZee ed< dZee ed< dZee ed< dZee ed< dZee ed< dZ ee ed< dZ!ee ed< dZ"ee ed< dZ#ee$ ed< dZ%ee ed< dZ&ee$ ed< dZ'ee ed< dZ(ee ed < dZ)ee*ee+f  ed!< e,d"e-d#e+d$ed%edeeeeef  dedee d&d fd'd(Z.dS ))ResponsesResponsez(Response body for v1/responses endpoint.c                   C   s   dt    S r  )r$   r%   r%   r%   r&   r'     s    zResponsesResponse.<lambda>r)   r   responser   c                   C   r    r!   r"   r%   r%   r%   r&   r'     r(   r   r   output)queuedin_progress	completedfailed	cancelledr   Nr   Tr  r  r6  r$  r;   incomplete_detailsr  r  r  rB  r  r   rs   r   r  r   r   requestri  
model_namecreated_timerZ  c           
      C   s  dt ttttf  dtfdd}||rdddiind}	| d i d	|jd
|d|d|d|d|d|jp7dd|jd|j	ddddd|j
d|jd|jd|jrZ|jjndddd|jd|jd|	d|jd|jd|jd|jp~i S S )!z!Create a response from a request.r   rZ  c              	   S   s   | sdS | D ]B}t |tst |tr dS z(t |trW qt |tr8|js'W q|jD ]}t |ts6 W  dS q*nW  dS W q tyH   Y  dS w dS )NFT)r&  r   r   r   r   rM   AttributeError)r   itcr%   r%   r&   _is_text_only  s0   




z5ResponsesResponse.from_request.<locals>._is_text_onlyformatr=   rs   Nr   r   r   r  r   r   r  Tr6  r$  r;   r  r  r  r  rB  )rC  summaryr  r   r   r  r   r   r%   )r   r
   r   r   r   ri   r  r  r6  r$  r  r  r  rB  rC  r  r   r   r  r   r   )
r   r  ri  r  r  r  r   r   r  text_formatr%   r%   r&   from_request  sx   
	
zResponsesResponse.from_request)/r1   r2   r3   r4   r   r   r5   r6   r   r   r   r#   r8   r  r   r
   r   r   r   r   r   r`   r  ri   r6  r$  r  r;   r   r  r  r  r  rB  r  r   rE   rs   r   r  r   r   r   r   r   r  r  r%   r%   r%   r&   r    sb   
 	
r  c                   @   s*   e Zd ZU dZeed< dZee ed< dS )RequestResponseMetadataz'Metadata for request/response tracking.r  Nfinal_usage_info)	r1   r2   r3   r4   r5   r6   r  r   r`   r%   r%   r%   r&   r  ;  s   
 r  c                   @   sz   e Zd ZU dZeed< eeee f ed< e	e
 ed< e	e
 ed< e	e
 ed< ee ed< ee ed< d	Ze	e ed
< d	S )MessageProcessingResulta  Result of processing chat messages and applying templates.

    This dataclass encapsulates all the outputs from message processing including
    prompt generation, multimodal data extraction, and constraint preparation.
    Used internally by OpenAIServingChat to pass processed data between methods.

    Args:
        prompt: The final text prompt after applying chat template
        prompt_ids: Either the text prompt (str) or tokenized IDs (List[int])
        image_data: Extracted image data from messages, if any
        audio_data: Extracted audio data from messages, if any
        modalities: List of modality types present in the messages
        stop: Combined stop strings from template and request
        tool_call_constraint: Optional constraint for structured tool calls
    r   
prompt_ids
image_data
audio_data
video_datar  r   NrY  )r1   r2   r3   r4   r5   r6   r
   r   r#   r   r   rY  r~   r%   r%   r%   r&   r  B  s   
 r  c                   @   s:   e Zd ZU dZeee  ed< eed< e	eef ed< dS )ToolCallProcessingResultz.Result of processing tool calls in a response.r#  remaining_textr   N)
r1   r2   r3   r4   r   r   r   r6   r5   r   r%   r%   r%   r&   r  ^  s   
 r  c                   @   s&   e Zd ZU eed< dZed ed< dS )ResponseReasoningTextContentrs   reasoning_textr=   N)r1   r2   r3   r5   r6   r=   r   r%   r%   r%   r&   r  h  s   
 r  )}r4   loggingr$   r  dataclassesr   typingr   r   r   r   r   r   r	   r
   openai.types.responsesr   r   r   r   r   r   openai.types.responses.responser   openai.types.responses.toolr   pydanticr   r   r   r   r   typing_extensionsr   xgrammarr   sglang.utilsr   	getLoggerr1   rg  r   r   r7   r:   r@   rF   rK   rL   rN   r\   r`   rf   rj   rr   rv   ry   r}   r6   r~   r   r   r   r   r   r   r   r   r   r   r   r   r  r
  r  r  r  r  r*  RerankContentPartr5   r  r  r  r  r+  rn  r,  r.  r/  ro  rp  rs  rt  ru  rw  rx  r#   r}  ry  r~  r  r  r  r  r  r  r  r  r  r  r  r  r  OpenAIServingRequestr  r  r  r  r  r  r  r  r  r  r%   r%   r%   r&   <module>   s  ( 
			K			   		*	st
