o
    پi                    @  s&	  d Z ddlmZ ddlZddlZddlmZ ddlmZm	Z	 ddl
mZ ddlmZmZmZmZmZmZmZ ddlZddlmZ dd	lmZ dd
lmZ ddlmZ ddlmZ eraddlm Z  neZ eG dd deZ!eG dd deZ"eG dd dZ#eG dd dZ$eG dd dZ%e&e%j'( Z)eG dd dZ*ee e+eef Z,ee+ef Z-ee+ef Z.ee,e.e-f Z/eeee/  ee/ e/f Z0eG dd de!e%Z1eG dd de!Z2eG dd de"Z3eG d d! d!e!e%Z4eG d"d# d#e!Z5eG d$d% d%e"Z6eG d&d' d'e"e#e$Z7eG d(d) d)e"Z8eG d*d+ d+e"e#e$Z9eG d,d- d-e"Z:eG d.d/ d/e"e#Z;eG d0d1 d1e!Z<eG d2d3 d3e!Z=eG d4d5 d5e!Z>eG d6d7 d7e!Z?eG d8d9 d9e!Z@eG d:d; d;e!ZAeG d<d= d=e!ZBeG d>d? d?e!ZCeG d@dA dAe!ZDeG dBdC dCe!ZEeG dDdE dEe!ZFeG dFdG dGe!ZGeG dHdI dIe!ZHeG dJdK dKe!ZIeG dLdM dMe!ZJeG dNdO dOe!ZKeG dPdQ dQe!ZLeG dRdS dSe!ZMeG dTdU dUe!ZNeG dVdW dWe!ZOeG dXdY dYe!ZPeG dZd[ d[e!ZQeG d\d] d]e!ZReG d^d_ d_e!ZSeG d`da dae!ZTeG dbdc dce!ZUeG ddde dee!ZVeG dfdg dge!ZWeG dhdi die!ZXeG djdk dke!ZYeG dldm dme!ZZeG dndo doe!Z[eG dpdq dqe!Z\eG drds dse!Z]eG dtdu due!Z^eG dvdw dwe!Z_eG dxdy dye!Z`eG dzd{ d{e!ZaeG d|d} d}e!ZbeG d~d de!ZceG dd de!ZdeG dd de!ZeeG dd de!ZfeG dd de!ZgG dd deZheG dd de!ZieG dd de!ZjeG dd de!ZkeG dd de!ZleG dd de!ZmeG dd de!ZneG dd de!ZoeG dd de!ZpG dd deZqeG dd de!ZreG dd de!ZseG dd dZteG dd dZueG dd de!ZveG dd de!ZweG dd de!ZxeG dd de!ZyeG dd de!ZzeG dd de!Z{eG dd de!Z|eG dd de!Z}eG dd de!Z~e~ Z ZZG dd deZeG dd de!ZeG dd de!ZeG dd de!ZeG dd dZeG dd dZeG ddÄ dÃZeG ddń dŃZeG ddǄ dǃZeG ddɄ de!ZeG dd˄ de!ZeG dd̈́ de!ZeG ddτ de!ZeG ddф de!ZeG ddӄ de!ZeG ddՄ de!ZeG ddׄ de!ZeG ddل de!Zddۄ Ze  dS )zv
The definition of objects transferred between different
processes (TokenizerManager, DetokenizerManager, Scheduler).
    )annotationsN)ABC)	dataclassfield)Enum)TYPE_CHECKINGAnyDictListLiteralOptionalUnion)LoRARef)BaseFinishReason)has_valid_data)SamplingParams)	ImageData)Imagec                   @  s>   e Zd ZU edddZded< edddZded< dd	 ZdS )
BaseReqNTdefaultkw_onlyzOptional[Union[str, List[str]]]ridOptional[str]http_worker_ipcc                 C  s>   t | jtrdd tt| jD | _| jS t j| _| jS )z(Generate a new request ID and return it.c                 S     g | ]}t  jqS  uuiduuid4hex.0_r   r   Q/home/ubuntu/.local/lib/python3.10/site-packages/sglang/srt/managers/io_struct.py
<listcomp>3       z*BaseReq.regenerate_rid.<locals>.<listcomp>)
isinstancer   listrangelenr   r   r    selfr   r   r$   regenerate_rid0   s
   zBaseReq.regenerate_rid)__name__
__module____qualname__r   r   __annotations__r   r-   r   r   r   r$   r   +      
 r   c                   @  s>   e Zd ZU edddZded< edddZded< dd ZdS )	BaseBatchReqNTr   Optional[List[str]]ridshttp_worker_ipcsc                 C  s    dd t t| jD | _| jS )z)Generate new request IDs and return them.c                 S  r   r   r   r!   r   r   r$   r%   @   r&   z0BaseBatchReq.regenerate_rids.<locals>.<listcomp>)r)   r*   r5   r+   r   r   r$   regenerate_rids>   s   zBaseBatchReq.regenerate_rids)r.   r/   r0   r   r5   r1   r6   r7   r   r   r   r$   r3   9   r2   r3   c                   @  s:   e Zd ZU dZded< ded< ded< ded< ded< dS )	RequestTimingMetricsMixinz
    Mixin class containing common request-level timing metrics.

    This class consolidates the timing metrics that are shared across all batch output types
    to avoid code duplication and ensure consistency.
    zOptional[List[Optional[float]]]
queue_timeforward_entry_timeprefill_launch_delayprefill_launch_latencyprefill_finished_tsNr.   r/   r0   __doc__r1   r   r   r   r$   r8   D   s   
 
r8   c                   @  s*   e Zd ZU dZded< ded< ded< dS )SpeculativeDecodingMetricsMixinz
    Mixin class containing speculative decoding metrics.

    This class consolidates speculative decoding metrics that are shared across
    batch output types that support speculative decoding to avoid code duplication.
    	List[int]spec_verify_ctspec_accepted_tokensList[List[int]]spec_acceptance_histogramNr>   r   r   r   r$   r@   i   s
   
 r@   c                   @  s2   e Zd ZU dZded< dZded< dZded< dS )APIServingTimingMixinNOptional[float]validation_timereceived_timereceived_time_perf)r.   r/   r0   rH   r1   rI   rJ   r   r   r   r$   rF      s   
 rF   c                   @  sJ   e Zd ZU dZded< dZded< dZded< dZded< dZded	< dS )
SessionParamsNr   idr   Optional[int]offsetOptional[bool]replacedrop_previous_output)	r.   r/   r0   rL   r1   r   rN   rP   rQ   r   r   r   r$   rK      s   
 rK   c                   @  s  e Zd ZU dZded< dZded< dZded< dZded	< dZded
< dZ	ded< dZ
ded< dZded< dZded< dZded< dZded< dZded< dZded< dZded< dZded< dZded< dZded< dZd ed!< dZded"< dZd#ed$< dZd#ed%< dZd&ed'< dZded(< dZd)ed*< dZded+< dZded,< dZd)ed-< dZded.< dZ d/ed0< dZ!ded1< dZ"d2ed3< dZ#d/ed4< dZ$ded5< dZ%d2ed6< dZ&ded7< dZ'd8ed9< dZ(ded:< dZ)ded;< dZ*d<ed=< dZ+d>ed?< dZ,d@edA< dZ-d/edB< dZ.d/edC< dZ/d/edD< dZ0d/edE< dmdGdHZ1dIdJ Z2dKdL Z3dMdN Z4dOdP Z5dQdR Z6dSdT Z7dUdV Z8dWdX Z9dYdZ Z:d[d\ Z;d]d^ Z<d_d` Z=dadb Z>dcdd Z?dedf Z@dgdh ZAdidj ZBdkdl ZCdS )nGenerateReqInputNzOptional[Union[List[str], str]]text+Optional[Union[List[List[int]], List[int]]]	input_ids;Optional[Union[List[List[List[float]]], List[List[float]]]]input_embeds#Optional[MultimodalDataInputFormat]
image_data
video_data
audio_data!Optional[Union[List[Dict], Dict]]sampling_paramsz!Optional[Union[List[bool], bool]]return_logprobzOptional[Union[List[int], int]]logprob_start_lentop_logprobs_numtoken_ids_logprobFboolreturn_text_in_logprobsstreamTlog_metricszUnion[List[bool], bool]return_hidden_statesreturn_routed_expertsr   introuted_experts_start_lenr4   
modalitiessession_params3Optional[Union[List[Optional[str]], Optional[str]]]	lora_pathlora_idz)Optional[Union[List[Optional[str]], str]]custom_logit_processorbootstrap_hostz)Optional[Union[List[Optional[int]], int]]bootstrap_portbootstrap_roombootstrap_pair_keydecode_tp_sizerequire_reasoningrM   data_parallel_rank
backgroundr   conversation_idpriority	extra_keyrouting_keyno_logszOptional[Dict[str, str]]custom_labelsreturn_bytesreturn_entropyOptional[Dict]external_trace_headerrO   need_wait_for_imageOptional[List]num_items_assignedmax_dynamic_patchmin_dynamic_patchimage_max_dynamic_patchvideo_max_dynamic_patchreturnc                 C     t | jpt | jpt | jS Nr   rY   rZ   r[   r+   r   r   r$   contains_mm_input  
   
z"GenerateReqInput.contains_mm_inputc                 C  s6   |    |   |   | jr|   dS |   dS )a	  
        Normalize the batch size and arguments for the request.

        This method resolves various input formats and ensures all parameters
        are properly formatted as either single values or batches depending on the input.
        It also handles parallel sampling expansion and sets default values for
        unspecified parameters.

        Raises:
            ValueError: If inputs are not properly specified (e.g., none or all of
                       text, input_ids, input_embeds are provided)
        N)_validate_inputs_determine_batch_size_handle_parallel_sampling	is_single_normalize_single_inputs_normalize_batch_inputsr+   r   r   r$   normalize_batch_and_arguments!  s   z.GenerateReqInput.normalize_batch_and_argumentsc                 C  sP   | j du r| jdu r| jdu s| j dur"| jdur$| jdur&tddS dS dS )z/Validate that the input configuration is valid.Nz:Either text, input_ids or input_embeds should be provided.)rS   rU   rW   
ValueErrorr+   r   r   r$   r   7  s   


z!GenerateReqInput._validate_inputsc                 C  s   | j dur t| j trd| _d| _n	d| _t| j | _d| _dS | jdurMt| jdkr0tdt| jd t	r?d| _d| _n	d| _t| j| _d| _dS t| jd d t
r_d| _d| _dS d| _t| j| _dS )zDDetermine if this is a single example or a batch and the batch size.NT   Fr   zinput_ids cannot be empty.)rS   r'   strr   
batch_sizer*   rW   rU   r   rh   floatr+   r   r   r$   r   D  s*   




z&GenerateReqInput._determine_batch_sizec                 C  s   | j du r
d| _dS t| j tr| j dd| _n| j d dd| _| j D ]}| j|ddkr5tdq&| jdkra| jrcd| _| jdurK| jg| _| jdurU| jg| _| j	dure| j	g| _	dS dS dS dS )zDHandle parallel sampling parameters and adjust batch size if needed.Nr   nr   zLThe parallel_sample_num should be the same for all samples in sample params.F)
r]   parallel_sample_numr'   dictgetr   r   rS   rU   rW   )r,   r]   r   r   r$   r   `  s,   






z*GenerateReqInput._handle_parallel_samplingc                 C  sj   | j du ri | _ | jdu rt j| _| jdu rd| _| jdu r#d| _| jdu r+d| _| js3d| _dS dS )z&Normalize inputs for a single example.NFr   )	r]   r   r   r   r    r^   r_   r`   ra   r+   r   r   r$   r   z  s   





z)GenerateReqInput._normalize_single_inputsc                 C  s   | j dkr	| j}n| j| j  }| | | | | | | | | | | | | | | 	| | 
| | | dS )zPNormalize inputs for a batch of examples, including parallel sampling expansion.r   N)r   r   _expand_inputs_normalize_rid_normalize_lora_paths_normalize_image_data_normalize_video_data_normalize_audio_data_normalize_sampling_params_normalize_logprob_params!_normalize_custom_logit_processor_normalize_bootstrap_paramsr,   numr   r   r$   r     s   









z(GenerateReqInput._normalize_batch_inputsc                 C  s   | j durt| j tstd| j | j | _ dS | jdur8t| jtr+t| jd ts/td| j| j | _dS | jdurPt| jtsGtd| j| j | _dS dS )zMExpand the main inputs (text, input_ids, input_embeds) for parallel sampling.Nz+Text should be a list for batch processing.r   z9input_ids should be a list of lists for batch processing.z3input_embeds should be a list for batch processing.)rS   r'   r(   r   r   rU   rW   r   r   r   r$   r     s"   



zGenerateReqInput._expand_inputsc                 C  sR   | j dur't| j tr| j g| | _ dS t| j tr#| j | j | _ dS tddS )*Normalize LoRA paths for batch processing.N'lora_path should be a list or a string.)rm   r'   r   r(   r   r   r   r   r   r$   r     s   
z&GenerateReqInput._normalize_lora_pathsc                 C  s  | j du rdg| | _ dS t| j ts#| j gg| | _ dg| | _dS t| j trt| j dkr8dg| | _ dS t| j | jkrDtdg | _t| j dkrt| j d trtt| j D ]>}| j | du sn| j | dgkru| jd q]t| j | dkr| jd q]t| j | dkr| jd q]| jd q]| j | j	 | _ | j| j	 | _dS dd | j D }|| j	 | _ dg| | _dS dS )	z*Normalize image data for batch processing.Nimager   z;The length of image_data should be equal to the batch size.r   zmulti-imagesc                 S  s   g | ]}|gqS r   r   )r"   imgr   r   r$   r%     s    z:GenerateReqInput._normalize_image_data.<locals>.<listcomp>)
rY   r'   r(   rj   r*   r   r   r)   appendr   )r,   r   iwrapped_imagesr   r   r$   r     s:   
z&GenerateReqInput._normalize_image_datac                 C  Z   | j du rdg| | _ dS t| j ts| j g| | _ dS t| j tr+| j | j | _ dS dS )z*Normalize video data for batch processing.N)rZ   r'   r(   r   r   r   r   r$   r        
z&GenerateReqInput._normalize_video_datac                 C  r   )z*Normalize audio data for batch processing.N)r[   r'   r(   r   r   r   r   r$   r     r   z&GenerateReqInput._normalize_audio_datac                 C  sJ   | j du ri g| | _ dS t| j tr| j g| | _ dS | j | j | _ dS )z3Normalize sampling parameters for batch processing.N)r]   r'   r   r   r   r   r   r$   r     s
   
z+GenerateReqInput._normalize_sampling_paramsc                   s~    j du rdd t|D  _ dS t j tr' fddt|D }| _ dS t j tr;t j  jkr9tddS td)z+Normalize request IDs for batch processing.Nc                 S  r   r   r   r!   r   r   r$   r%     r&   z3GenerateReqInput._normalize_rid.<locals>.<listcomp>c                   s   g | ]
} j  d | qS )r#   r   r"   r   r+   r   r$   r%     s    zLThe specified rids length mismatch with the batch_size for batch processing.z0The rid should be a string or a list of strings.)r   r)   r'   r   r(   r*   r   r   )r,   r   new_ridsr   r+   r$   r      s   

zGenerateReqInput._normalize_ridc                   s    fdd}|j dd_ |jdd_|jdd_js*d	g  _d	S tjts>fd
dt D _d	S tjd tsTfddt D _d	S jdkr]tdd	S )z:Normalize logprob-related parameters for batch processing.c                   sD   | d u r	|g  S t | ts| g  S jdkr td| d| S )Nr   zCannot use list z with parallel_sample_num > 1)r'   r(   r   r   )paramdefault_value
param_namer   r,   r   r$   normalize_param  s   




zCGenerateReqInput._normalize_logprob_params.<locals>.normalize_paramFr^   r   r_   r   r`   Nc                   s   g | ]} j gqS r   )ra   r!   r+   r   r$   r%   0  s    z>GenerateReqInput._normalize_logprob_params.<locals>.<listcomp>c                   s   g | ]}t  jqS r   )copydeepcopyra   r!   r+   r   r$   r%   2  s    r   z>Cannot use list token_ids_logprob with parallel_sample_num > 1)	r^   r_   r`   ra   r'   r(   r)   r   r   )r,   r   r   r   r   r$   r     s.   

z*GenerateReqInput._normalize_logprob_paramsc                 C  sN   | j du rdg| | _ dS t| j ts| j g| | _ dS | jdkr%tddS )z6Normalize custom logit processor for batch processing.Nr   zCCannot use list custom_logit_processor with parallel_sample_num > 1)ro   r'   r(   r   r   r   r   r   r$   r   :  s   

z2GenerateReqInput._normalize_custom_logit_processorc                   sN   j du rdg|  _ nt j ts j g|  _ nt j tr' j  j  _  jdu r3dg|  _nt jtsA jg|  _nt jtrN j j  _ jdu rZdg|  _n t jtsm fddt|D  _nt jtrz j j  _ jdu rdg|  _dS t jts jg|  _dS t jtr j j  _dS dS )z4Normalize bootstrap parameters for batch processing.Nc                   s   g | ]} j | qS r   )rr   r   r+   r   r$   r%   [  r&   z@GenerateReqInput._normalize_bootstrap_params.<locals>.<listcomp>)rp   r'   r(   r   rq   rr   r)   rs   r   r   r+   r$   r   E  s2   



z,GenerateReqInput._normalize_bootstrap_paramsc                 C  s:   | j durt| j tst| j d tstddS dS dS )z8Validate that session parameters are properly formatted.Nr   z1Session params must be a dict or a list of dicts.)rk   r'   r   r   r+   r   r   r$   _validate_session_paramsg  s   

z)GenerateReqInput._validate_session_paramsc                   s  t d(i d jd ur j| nd d jd ur j| nd d jd ur( j| nd d j| d j| d j| d j| d j| d	 j	| d
 j
| d j| d j| d jd jd jdt jtrx j| n jd jd jr j| nd d jd jd ur j| nd d jd ur j| nd d jd ur j| nd d jd ur j| nd d jd ur͈ j| nd d jd urڈ j| nd d jd ur j| nd d jd ur j| nd d jd ur jn1d d jd j d j!d  j"d! j#d" j$d# j%d$ j&d% j' fd&d't(D S d jd j d j!d  j"d! j#d" j$d# j%d$ j&d% j' fd&d't(D S ))NrS   rU   rW   rY   rZ   r[   r]   r   r^   r_   r`   ra   rc   rd   re   rf   rg   rj   rk   rm   rn   ro   rp   rq   rr   rs   rt   rv   rx   ry   rz   r|   r}   r~   r   r   r   c                      i | ]}|t  |qS r   getattrr"   r   r+   r   r$   
<dictcomp>      
z0GenerateReqInput.__getitem__.<locals>.<dictcomp>r   ))rR   rS   rU   rW   rY   rZ   r[   r]   r   r^   r_   r`   ra   rc   rd   re   r'   rf   r(   rg   rj   rk   rm   rn   ro   rp   rq   rr   rs   rt   rv   rx   ry   rz   r|   r}   r~   r   r   r    _API_SERVING_TIMING_MIXIN_FIELDSr,   r   r   r+   r$   __getitem__o  s   



	








#&)
-
146789:;<=>
46789:;<=>
zGenerateReqInput.__getitem__r   rb   )Dr.   r/   r0   rS   r1   rU   rW   rY   rZ   r[   r]   r^   r_   r`   ra   rc   rd   re   rf   rg   ri   rj   rk   rm   rn   ro   rp   rq   rr   rs   rt   ru   rv   rw   rx   ry   rz   r{   r|   r}   r~   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r$   rR      s   
 

*			)"rR   c                   @  sj  e Zd ZU ded< ded< ded< ded< d	ed
< ded< ded< ded< d	ed< dZd	ed< dZd	ed< dZded< dZded< dZded< dZ	ded< dZ
ded< dZded< dZded< dZded < dZded!< dZded"< dZd	ed#< dZded$< dZded%< dZded&< dZded'< dZd	ed(< dZd)ed*< dZd	ed+< dZd	ed,< dZd	ed-< dZd.ed/< dS )0TokenizedGenerateReqInputr   
input_textrA   rU   r   	mm_inputsr   r]   rb   r^   rh   r_   r`   ra   rd   Frf   rg   r   ri   NrV   rW   zOptional[SessionParams]rk   r   rn   ro   rp   rM   rq   rr   rs   rt   ru   rv   ry   rz   r{   r|   r   trace_contextr~   r   r   r   r   )r.   r/   r0   r1   rf   rg   ri   rW   rk   rn   ro   rp   rq   rr   rs   rt   ru   rv   ry   rz   r{   r|   r   r~   r   r   r   r   r   r   r$   r     sB   
 r   c                   @  .   e Zd ZU ded< dd Zdd Zdd Zd	S )
BatchTokenizedGenerateReqInputzList[TokenizedGenerateReqInput]batchc                 C  
   t | jS r   r*   r   r+   r   r   r$   __len__     
z&BatchTokenizedGenerateReqInput.__len__c                 C  
   | j | S r   r   r   r   r   r$   r     r   z*BatchTokenizedGenerateReqInput.__getitem__c                 C  r   r   iterr   r+   r   r   r$   __iter__  r   z'BatchTokenizedGenerateReqInput.__iter__Nr.   r/   r0   r1   r   r   r   r   r   r   r$   r     
   
 r   c                   @  s  e Zd ZU dZded< dZded< dZded< dZded< dZded	< dZ	d
ed< dZ
ded< dZded< dZded< dZded< dZded< dZded< dZded< dZded< dZded< dZded< dZded < dZded!< d"d# Zd$d% Zd+d'd(Zd)d* ZdS ),EmbeddingReqInputNz0Optional[Union[List[List[str]], List[str], str]]rS   rX   rY   rZ   r[   rT   rU   r\   r]   rV   rW   Trb   re   r4   rj   rG   rH   Fis_cross_encoder_requestrM   ry   r   r{   rw   r   r   
dimensionsrl   rm   rn   c                 C  s  | j d u r| jd u r| jd u rtd| j d ur!| jd ur!tdd| _d| _| j d urGt| j tr@|  jt| j 7  _d| _n|  jd7  _| jd urit| jd trb|  jt| j7  _d| _n|  jd7  _| jr| j	d u rwt
 j| _	| jd u ri | _d| jd< d S | j	d u rdd	 t| jD | _	n
t| j	tsJ d
| jd u ri g| j | _nt| jtr| jg| j | _t| jD ]	}d| j| d< q| | j d S )Nz<At least one of text, input_ids, or image should be providedz6text and input_ids cannot be provided at the same timer   TFr   max_new_tokensc                 S  r   r   r   r!   r   r   r$   r%   n  r&   zCEmbeddingReqInput.normalize_batch_and_arguments.<locals>.<listcomp>zThe rid should be a list.)rS   rU   rY   r   r   r   r'   r(   r*   r   r   r   r    r]   r)   r   r   r   r   r   r$   r   F  sD   





z/EmbeddingReqInput.normalize_batch_and_argumentsc                 C  sn   | j dur5t| j tr| j g| | _ dS t| j tr1t| j |kr/tdt| j  d| ddS tddS )r   Nzlora_path list length (z) must match batch size ()r   )rm   r'   r   r(   r*   r   r   r   r   r$   r   {  s   
z'EmbeddingReqInput._normalize_lora_pathsr   c                 C  r   r   r   r+   r   r   r$   r     r   z#EmbeddingReqInput.contains_mm_inputc                   s<   j r4t jd ur j| gnd  j|  j|  jd ur" j| nd  jd ur- j| nd d jdS td jd ur@ j| nd  jd urK j| nd  j	d urV j	| nd  j
d ura j
| nd  jd url j| nd  j|  j|  jd ur j| nd  jd ur j| nd  j j jd fddtD S )NT)rS   r]   r   rm   rn   r   r   )rS   rU   rY   r[   rZ   r]   r   rm   rn   r   r   r   c                   r   r   r   r   r+   r   r$   r     r   z1EmbeddingReqInput.__getitem__.<locals>.<dictcomp>r   )r   r   rS   r]   r   rm   rn   r   rU   rY   r[   rZ   r   r   r   r   r   r+   r$   r     s8   

zEmbeddingReqInput.__getitem__r   )r.   r/   r0   rS   r1   rY   rZ   r[   rU   r]   rW   re   rj   rH   r   ry   r{   rw   r   r   rm   rn   r   r   r   r   r   r   r   r$   r     s.   
 5
r   c                   @  sf   e Zd ZU ded< ded< ded< ded< ded	< d
Zded< d
Zded< d
Zded< d
Zded< d
S )TokenizedEmbeddingReqInputr   r   rA   rU   r   image_inputstoken_type_idsr   r]   NrM   rv   ry   r   r   rn   )r.   r/   r0   r1   rv   ry   r   rn   r   r   r   r$   r     s   
 r   c                   @  r   )
BatchTokenizedEmbeddingReqInputz List[TokenizedEmbeddingReqInput]r   c                 C  r   r   r   r+   r   r   r$   r     r   z'BatchTokenizedEmbeddingReqInput.__len__c                 C  r   r   r   r   r   r   r$   r     r   z+BatchTokenizedEmbeddingReqInput.__getitem__c                 C  r   r   r   r+   r   r   r$   r     r   z(BatchTokenizedEmbeddingReqInput.__iter__Nr   r   r   r   r$   r     r   r   c                   @  s&  e Zd ZU ded< ded< ded< ded< ded	< d
ed< d
ed< d
ed< ded< ded< ded< ded< ded< ded< ded< ded< ded< ded< ded< ded< ded< ded< ded< ded< d ed!< d"ed#< d$ed%< d$ed&< ded'< d(Zd)ed*< d(Zd+ed,< d(Zd-ed.< d(Zd/ed0< d(S )1BatchTokenIDOutputList[BaseFinishReason]finished_reasons	List[str]decoded_textsrA   
decode_idsread_offsetsOptional[List[int]]
output_ids
List[bool]skip_special_tokensspaces_between_special_tokensno_stop_trimprompt_tokenscompletion_tokenscached_tokensList[float]input_token_logprobs_valinput_token_logprobs_idxoutput_token_logprobs_valoutput_token_logprobs_idx
List[List]input_top_logprobs_valinput_top_logprobs_idxoutput_top_logprobs_valoutput_top_logprobs_idxinput_token_ids_logprobs_valinput_token_ids_logprobs_idxoutput_token_ids_logprobs_valoutput_token_ids_logprobs_idxoutput_token_entropy_valList[List[float]]output_hidden_statesList[Optional[torch.Tensor]]routed_expertsList[Optional[List[int]]]placeholder_tokens_idxplaceholder_tokens_valretraction_countsNrD   token_stepsGetLoadReqOutputloadOptional[Dict[str, List[Any]]]customized_info(Optional[List[Optional[Dict[str, Any]]]]cached_tokens_detailsr.   r/   r0   r1   r  r  r  r  r   r   r   r$   r     sD   
 r   c                   @  s   e Zd ZU ded< ded< ded< ded< ded< ded< d	ed
< d	ed< ded< ded< ded< ded< ded< ded< ded< ded< d	ed< dZded< dS )BatchMultimodalDecodeReqrA   decoded_idsr   r   r   r   r   r   r   r   r   rD   image_resolutionsresize_image_resolutionsr   r   r   r   r   r  r	  r
  r~   Nr  )r.   r/   r0   r1   r  r   r   r   r$   r    s&   
 r  c                   @  s   e Zd ZU ded< ded< ded< ded< ded	< ded
< ded< ded< ded< ded< ded< ded< ded< ded< ded< ded< ded< ded< ded< ded< ded< ded< ded < ded!< d"Zd#ed$< d"Zd%ed&< d"Zd'ed(< d"Zd)ed*< d"S )+BatchStrOutput
List[dict]r   r   output_strsr   r   rA   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r  r  r  r  r  r  r  r  r	  r
  r  NrD   r  r  r  r  r  r  r  r  r   r   r   r$   r  4  s:   
 r  c                   @  s   e Zd ZU ded< ded< ded< ded< ded	< ded
< ded< ded< ded< ded< ded< ded< ded< dZded< dS )BatchMultimodalOutputr  r   rD   r  z*Union[List[str | bytes], List[List[Dict]]]outputsr  r   r   r   r   rA   r   r   r   r  r	  r
  r   r~   Nr  r  r.   r/   r0   r1   r  r   r   r   r$   r  o  s   
 r  c                   @  sR   e Zd ZU ded< ded< ded< ded< ded	< ded
< ded< dZded< dS )BatchEmbeddingOutputr   r   z0Union[List[List[float]], List[Dict[int, float]]]
embeddingsrA   r   r   r  r	  r
  r  Nr  r  r  r   r   r   r$   r    s   
 r  c                   @     e Zd ZdS )ClearHiCacheReqInputNr.   r/   r0   r   r   r   r$   r!        r!  c                   @     e Zd ZU ded< dS )ClearHiCacheReqOutputrb   successNr.   r/   r0   r1   r   r   r   r$   r%       
 r%  c                   @  r   )FlushCacheReqInputNr"  r   r   r   r$   r)    r#  r)  c                   @  r$  )FlushCacheReqOutputrb   r&  Nr'  r   r   r   r$   r*    r(  r*  c                   @  sF   e Zd ZU dZded< dZded< dZded< dZded< d	d
 ZdS )AttachHiCacheStorageReqInputaI  Dynamically attach (enable) HiCache storage backend at runtime.

    Note: `hicache_storage_backend_extra_config_json` is a JSON string. It may contain both:
    - backend-specific configs (e.g., mooncake master address)
    - prefetch-related knobs (prefetch_threshold, prefetch_timeout_*, hicache_storage_pass_prefix_keys)
    r   hicache_storage_backendNr   )hicache_storage_backend_extra_config_jsonhicache_storage_prefetch_policyhicache_write_policyc                 C  sr   | j d u rng d}| j |vrtd| j d| d| jd u r"d S g d}| j|vr7td| jd| dd S )N)best_effortwait_completetimeoutz)Invalid hicache_storage_prefetch_policy: . Expected one of .)
write_backwrite_throughwrite_through_selectivezInvalid hicache_write_policy: )r.  r   r/  r,   allowedr   r   r$   __post_init__  s&   





z*AttachHiCacheStorageReqInput.__post_init__)	r.   r/   r0   r?   r1   r-  r.  r/  r:  r   r   r   r$   r+    s   
 r+  c                   @  "   e Zd ZU ded< dZded< dS )AttachHiCacheStorageReqOutputrb   r&   r   messageNr.   r/   r0   r1   r>  r   r   r   r$   r<       
 r<  c                   @  s   e Zd ZdZdS )DetachHiCacheStorageReqInputz@Dynamically detach (disable) HiCache storage backend at runtime.N)r.   r/   r0   r?   r   r   r   r$   rA    s    rA  c                   @  r;  )DetachHiCacheStorageReqOutputrb   r&  r=  r   r>  Nr?  r   r   r   r$   rB    r@  rB  c                   @  s&   e Zd ZU dZdZded< dd ZdS )PauseGenerationReqInputa  
    Note that the PauseGenerationRequests is only supported in SGLang Server.
    abort: Abort and return all requests currently being processed.

    in_place: Pause the scheduler's event_loop from performing inference;
            only non-inference requests (e.g., control commands) will be handled.
            The requests in the engine will be paused and stay in the event_loop,
            then continue generation after continue_generation with the old kv cache.
            Note: In 'inplace' mode, flush_cache will fail if there are any requests
            in the running_batch.

    retract: Pause the scheduler's event loop from performing inference;
            only non-inference requests will be handled, and all currently running
            requests will be retracted back to the waiting_queue.
            Note: The KV cache can be flushed in this mode and will be automatically
            recomputed after continue_generation.
    abortz'Literal['abort', 'retract', 'in_place']modec                 C  s.   g d}| j |vrtd| j d| dd S )N)rD  retractin_placezInvalid mode: r3  r4  )rE  r   r8  r   r   r$   r:    s   
z%PauseGenerationReqInput.__post_init__N)r.   r/   r0   r?   rE  r1   r:  r   r   r   r$   rC    s   
 rC  c                   @  r   )ContinueGenerationReqInputNr"  r   r   r   r$   rH    r#  rH  c                   @  s   e Zd ZU ded< dZded< dZded< dZded	< dZded
< dZded< dZ	ded< dZ
ded< dZded< dZded< dZded< dS )UpdateWeightFromDiskReqInputr   
model_pathNr   load_formatFrb   abort_all_requestsweight_versionis_asynctorch_empty_cache
keep_pauserecapture_cuda_graphr   rh   
token_stepTflush_cacheOptional[Dict[str, Any]]manifest)r.   r/   r0   r1   rK  rL  rM  rN  rO  rP  rQ  rR  rS  rU  r   r   r   r$   rI  
  s   
 rI  c                   @  *   e Zd ZU ded< ded< dZded< dS )	UpdateWeightFromDiskReqOutputrb   r&  r   r>  r   rM   num_paused_requestsN)r.   r/   r0   r1   rX  r   r   r   r$   rW  $  s   
 rW  c                   @  sb   e Zd ZU ded< ded< ded< dZded< d	Zd
ed< dZd
ed< dZded< dZded< dS )$UpdateWeightsFromDistributedReqInputr   namesdtypesrD   shapesweight_update_groupr   
group_nameTrb   rS  FrL  Nr   rM  rK  )	r.   r/   r0   r1   r^  rS  rL  rM  rK  r   r   r   r$   rY  ,  s   
 rY  c                   @     e Zd ZU ded< ded< dS )%UpdateWeightsFromDistributedReqOutputrb   r&  r   r>  Nr'  r   r   r   r$   r`  =     
 r`  c                   @  sJ   e Zd ZU dZded< dZded< dZded	< d
Zded< dZded< dS )UpdateWeightsFromTensorReqInputzUpdate model weights from tensor input.

    - Tensors are serialized for transmission
    - Data is structured in JSON for easy transmission over HTTP
    zList[Union[str, bytes]]serialized_named_tensorsNr   rK  Trb   rS  FrL  rM  )	r.   r/   r0   r?   r1   rK  rS  rL  rM  r   r   r   r$   rb  C  s   
 rb  c                   @  r_  ) UpdateWeightsFromTensorReqOutputrb   r&  r   r>  Nr'  r   r   r   r$   rd  V  ra  rd  c                   @  sF   e Zd ZU ded< ded< ded< ded< dZded< d	Zded
< dS )-InitWeightsSendGroupForRemoteInstanceReqInputr   master_addressportsrh   
group_rank
world_sizeweight_send_groupr^  ncclbackendNr.   r/   r0   r1   r^  rl  r   r   r   r$   re  \     
 re  c                   @  s.   e Zd ZU ded< dZded< dZded< dS )	UpdateWeightsFromIPCReqInputzDict[str, str]zmq_handlesTrb   rS  Nr   rM  )r.   r/   r0   r1   rS  rM  r   r   r   r$   ro  n  s   
 ro  c                   @  r_  )UpdateWeightsFromIPCReqOutputrb   r&  r   r>  Nr'  r   r   r   r$   rq  x  ra  rq  c                   @  r_  ).InitWeightsSendGroupForRemoteInstanceReqOutputrb   r&  r   r>  Nr'  r   r   r   r$   rr  ~  ra  rr  c                   @  s*   e Zd ZU ded< ded< dZded< dS )#SendWeightsToRemoteInstanceReqInputr   rf  rg  rj  r^  N)r.   r/   r0   r1   r^  r   r   r   r$   rs    s   
 rs  c                   @  r_  )$SendWeightsToRemoteInstanceReqOutputrb   r&  r   r>  Nr'  r   r   r   r$   rt    ra  rt  c                   @  sF   e Zd ZU ded< ded< ded< ded< dZded< d	Zded
< dS )InitWeightsUpdateGroupReqInputr   rf  rh   master_portrank_offsetri  r]  r^  rk  rl  Nrm  r   r   r   r$   ru    rn  ru  c                   @  r_  )InitWeightsUpdateGroupReqOutputrb   r&  r   r>  Nr'  r   r   r   r$   rx    ra  rx  c                   @  s   e Zd ZU dZded< dS )!DestroyWeightsUpdateGroupReqInputr]  r   r^  N)r.   r/   r0   r^  r1   r   r   r   r$   ry    s   
 ry  c                   @  r_  )"DestroyWeightsUpdateGroupReqOutputrb   r&  r   r>  Nr'  r   r   r   r$   rz    ra  rz  c                   @  r;  )UpdateWeightVersionReqInputr   new_versionTrb   rL  N)r.   r/   r0   r1   rL  r   r   r   r$   r{    s   
 r{  c                   @  r;  )GetWeightsByNameReqInputr   named   rh   truncate_sizeN)r.   r/   r0   r1   r  r   r   r   r$   r}    r@  r}  c                   @  r$  )GetWeightsByNameReqOutputr(   	parameterNr'  r   r   r   r$   r    r(  r  c                   @     e Zd ZU dZded< dS )ReleaseMemoryOccupationReqInputNr4   tagsr.   r/   r0   r  r1   r   r   r   r$   r       
 r  c                   @  r   ) ReleaseMemoryOccupationReqOutputNr"  r   r   r   r$   r    r#  r  c                   @  r  )ResumeMemoryOccupationReqInputNr4   r  r  r   r   r   r$   r    r  r  c                   @  r   )ResumeMemoryOccupationReqOutputNr"  r   r   r   r$   r    r#  r  c                   @  r$  )CheckWeightsReqInputr   actionNr'  r   r   r   r$   r    r(  r  c                   @  r_  )CheckWeightsReqOutputrb   r&  r   r>  Nr'  r   r   r   r$   r    ra  r  c                   @  r$  )SlowDownReqInputrG   forward_sleep_timeNr'  r   r   r   r$   r    r(  r  c                   @  r   )SlowDownReqOutputNr"  r   r   r   r$   r    r#  r  c                   @  s:   e Zd ZU dZded< dZded< dZded< d	d
 ZdS )AbortReqFrb   	abort_allNrT  finished_reasonr   abort_messagec                 C  s   | j d u r
d| _ d S d S )Nr=  r   r+   r   r   r$   r:    s   

zAbortReq.__post_init__)r.   r/   r0   r  r1   r  r  r:  r   r   r   r$   r    s
   
 r  c                   @  r$  )ActiveRanksOutputr   statusNr'  r   r   r   r$   r    r(  r  c                   @  r   )GetInternalStateReqNr"  r   r   r   r$   r    r#  r  c                   @  r$  )GetInternalStateReqOutputzDict[Any, Any]internal_stateNr'  r   r   r   r$   r    r(  r  c                   @  r$  )SetInternalStateReqDict[str, Any]server_argsNr'  r   r   r   r$   r    r(  r  c                   @  r_  )SetInternalStateReqOutputrb   updatedr  r  Nr'  r   r   r   r$   r    ra  r  c                   @  s   e Zd ZU dZded< dZded< dZded< dZded< d	Zd
ed< dZ	ded< dZ
ded< d	Zd
ed< dZded< dZded< dS )ProfileReqInputNr   
output_dirrM   
start_step	num_stepsr4   
activitiesFrb   profile_by_stagerO   
with_stackrecord_shapesmerge_profilesprofile_prefixprofile_stages)r.   r/   r0   r  r1   r  r  r  r  r  r  r  r  r  r   r   r   r$   r    s   
 r  c                   @     e Zd ZdZdZdS )ProfileReqTyper      N)r.   r/   r0   START_PROFILESTOP_PROFILEr   r   r   r$   r  7      r  c                   @  s   e Zd ZU ded< dZded< dZded< dZded< dZd	ed
< dZded< dZ	ded< dZ
ded< dZded< dZded< dZded< dZd	ed< dS )
ProfileReqr  typeNr   r  rM   r  r  r4   r  Frb   r  rO   r  r  
profile_idr  r  r  )r.   r/   r0   r1   r  r  r  r  r  r  r  r  r  r  r  r   r   r   r$   r  <  s   
 r  c                   @  r_  )ProfileReqOutputrb   r&  r   r>  Nr'  r   r   r   r$   r  L  ra  r  c                   @  r   )FreezeGCReqNr"  r   r   r   r$   r  R  r#  r  c                   @  sV   e Zd ZU dZded< dZded< dZded< dZded< dZded	< dZ	ded
< dS )ConfigureLoggingReqNrO   log_requestsrM   log_requests_levelr   log_requests_formatdump_requests_folderdump_requests_thresholdcrash_dump_folder)
r.   r/   r0   r  r1   r  r  r  r  r  r   r   r   r$   r  W  s   
 r  c                   @  "   e Zd ZU ded< dZded< dS )OpenSessionReqInputrh   capacity_of_str_lenNr   
session_id)r.   r/   r0   r1   r  r   r   r   r$   r  a  r@  r  c                   @  r$  )CloseSessionReqInputr   r  Nr'  r   r   r   r$   r  g  r(  r  c                   @  r_  )OpenSessionReqOutputr   r  rb   r&  Nr'  r   r   r   r$   r  l  ra  r  c                   @  r   )HealthCheckOutputNr"  r   r   r   r$   r  r  r#  r  c                   @  s   e Zd ZdZdZdZdS )ExpertDistributionReqTyper   r     N)r.   r/   r0   START_RECORDSTOP_RECORDDUMP_RECORDr   r   r   r$   r  w  s    r  c                   @  r$  )ExpertDistributionReqr  r  Nr'  r   r   r   r$   r  }  r(  r  c                   @  r   )ExpertDistributionReqOutputNr"  r   r   r   r$   r    r#  r  c                   @  s2   e Zd ZU dZded< dZded< dZded< dS )FunctionNr   descriptionr~  zOptional[object]
parameters)r.   r/   r0   r  r1   r~  r  r   r   r   r$   r    s   
 r  c                   @  s"   e Zd ZU ded< dZded< dS )Toolr  functionr   r  N)r.   r/   r0   r1   r  r   r   r   r$   r    r@  r  c                   @  s4   e Zd ZU ded< eedZded< dZded< dS )	ParseFunctionCallReqr   rS   default_factoryz
List[Tool]toolsNr   tool_call_parser)r.   r/   r0   r1   r   r(   r  r  r   r   r   r$   r    s   
 r  c                   @  s   e Zd ZU ded< ded< dS )SeparateReasoningReqInputr   rS   reasoning_parserNr'  r   r   r   r$   r    ra  r  c                   @  r  )VertexGenerateReqInputr  	instancesNzOptional[dict]r  r.   r/   r0   r1   r  r   r   r   r$   r    r@  r  c                   @  r  )RpcReqInputr   methodNr   r  r  r   r   r   r$   r    r@  r  c                   @  r_  )RpcReqOutputrb   r&  r   r>  Nr'  r   r   r   r$   r    ra  r  c                   @  s@   e Zd ZU ded< ded< dZded< dZded	< dddZdS )LoadLoRAAdapterReqInputr   	lora_namerm   Frb   pinnedNr   rn   r   r   c                 C  s   t | j| j| j| jdS )Nrn   r  rm   r  )r   rn   r  rm   r  r+   r   r   r$   to_ref  s   zLoadLoRAAdapterReqInput.to_refr   r   )r.   r/   r0   r1   r  rn   r  r   r   r   r$   r    s   
 r  c                   @  s,   e Zd ZU ded< dZded< d
dd	ZdS )UnloadLoRAAdapterReqInputr   r  Nr   rn   r   r   c                 C  s   t | j| jdS )N)rn   r  )r   rn   r  r+   r   r   r$   r    s   z UnloadLoRAAdapterReqInput.to_refr  )r.   r/   r0   r1   rn   r  r   r   r   r$   r    s   
 r  c                   @  sT   e Zd ZU ded< ded< ded< dZded< d	Zd
ed< d	Zded< dddZd	S )"LoadLoRAAdapterFromTensorsReqInputr   r  r  config_dictserialized_tensorsFrb   r  NrT  added_tokens_configr   rn   r   r   c                 C  s   t | j| jd| jdS )N
__tensor__r  )r   rn   r  r  r+   r   r   r$   r    s   z)LoadLoRAAdapterFromTensorsReqInput.to_refr  )r.   r/   r0   r1   r  r  rn   r  r   r   r   r$   r    s   
 r  c                   @  s.   e Zd ZU ded< dZded< dZded< dS )LoRAUpdateOutputrb   r&  Nr   error_messagezOptional[Dict[str, LoRARef]]loaded_adapters)r.   r/   r0   r1   r  r  r   r   r   r$   r    s   
 r  c                   @  r  )BlockReqTyper   r  N)r.   r/   r0   BLOCKUNBLOCKr   r   r   r$   r    r  r  c                   @  r$  )BlockReqInputr  r  Nr'  r   r   r   r$   r    r(  r  c                   @  r   )GetLoadReqInputNr"  r   r   r   r$   r    r#  r  c                   @  s6   e Zd ZU ded< ded< ded< ded< ded< dS )	r  rh   dp_ranknum_reqsnum_waiting_reqs
num_tokensr   ts_ticNr'  r   r   r   r$   r    s   
 r  c                   @  sj   e Zd ZU dZeddidZded< eddidZded< edd	idZded
< eddidZ	ded< dS )MemoryMetricszMemory breakdown metrics.metric)gaugezModel weight memory in GBmetadatar   	weight_gb)r  zKV cache memory in GBkv_cache_gb)r  zCUDA graph memory in GBgraph_gb)r  zMax tokens in KV cacherh   token_capacityN)
r.   r/   r0   r?   r   r  r1   r  r  r  r   r   r   r$   r    s   
 r  c                   @  s>   e Zd ZU dZeddidZded< eddidZded< d	S )
SpeculativeMetricszSpeculative decoding metrics.r  )r  zAvg accepted tokens per stepr  r   accept_length)r  zSpeculative acceptance rateaccept_rateN)r.   r/   r0   r?   r   r  r1   r   r   r   r   r$   r    s   
 r  c                   @  sT   e Zd ZU dZeddidZded< eddidZded< edd	idZd
ed< dS )LoRAMetricszLoRA adapter pool metrics.r  )r  zLoRA adapter slots in user  rh   
slots_used)r  zTotal LoRA adapter slotsslots_total)r  zLoRA pool utilization ratior   utilizationN)	r.   r/   r0   r?   r   r  r1   r  r  r   r   r   r$   r  )  s   
 r  c                   @  s   e Zd ZU dZded< edddidZded	< eddd
idZded< edddidZded< edddidZ	ded< edddidZ
ded< edddidZded< edddidZded< dS )DisaggregationMetricszPD disaggregation metrics.r   rE  r   r  )r  zPrefill prealloc queue requests)r   r  rh   prefill_prealloc_queue_reqs)r  zPrefill inflight queue requestsprefill_inflight_queue_reqs)r  zDecode prealloc queue requestsdecode_prealloc_queue_reqs)r  zDecode transfer queue requestsdecode_transfer_queue_reqs)r  zDecode retracted queue requestsdecode_retracted_queue_reqsg        )r  zKV transfer speed in GB/sr   kv_transfer_speed_gb_s)r  zKV transfer latency in mskv_transfer_latency_msN)r.   r/   r0   r?   r1   r   r  r  r  r	  r
  r  r  r   r   r   r$   r  4  s0   
 r  c                   @  sj   e Zd ZU dZeddidZded< eddidZded< edd	idZded
< eddidZ	ded< dS )QueueMetricszDetailed queue breakdown.r  )r  zMain waiting queue sizer  rh   waiting)r  zGrammar compilation queue sizegrammar)r  zRequests paused by weight syncpaused)r  zRetracted requests count	retractedN)
r.   r/   r0   r?   r   r  r1   r  r  r  r   r   r   r$   r  P  s   
 r  c                   @  sH   e Zd ZU dZeh dZedd dZded< dZ	d	ed
< dd Z
dS )GetLoadsReqInputzRequest for /v1/loads endpoint.>   allcoreloraspecdisaggmemoryqueuesc                   C  s   dgS )Nr  r   r   r   r   r$   <lambda>f  s    zGetLoadsReqInput.<lambda>r  r   includeNrM   r  c                 C  s<   | j rt| j | j }|rtd| dt| j dS dS )zValidate include sections.zInvalid include sections: z. Valid options: N)r  setVALID_SECTIONSr   sorted)r,   invalidr   r   r$   r:  i  s   zGetLoadsReqInput.__post_init__)r.   r/   r0   r?   	frozensetr  r   r  r1   r  r:  r   r   r   r$   r  ^  s   
 r  c                   @  s$  e Zd ZU dZded< ded< eddidZded	< edd
idZded< eddidZded< eddidZ	ded< eddidZ
ded< eddidZded< eddidZded< eddidZded< eddidZded< dZded< dZded< dZded < dZd!ed"< dZd#ed$< dS )%GetLoadsReqOutputz0Per-DP-rank load metrics for /v1/loads endpoint.rh   r  r   	timestampr  )r  zNumber of running requestsr  num_running_reqs)r  zNumber of waiting requestsr  )r  zNumber of tokens in usenum_used_tokens)r  zMaximum token capacitymax_total_num_tokens)r  zToken pool usage ratiotoken_usage)r  z Generation throughput tokens/secgen_throughput)r  zPrefix cache hit ratecache_hit_rate)r  zOverall utilization ratior  )r  z!Maximum running requests capacitymax_running_requestsNzOptional[MemoryMetrics]r  zOptional[SpeculativeMetrics]speculativezOptional[LoRAMetrics]r  zOptional[DisaggregationMetrics]disaggregationzOptional[QueueMetrics]r  )r.   r/   r0   r?   r1   r   r#  r  r$  r%  r&  r'  r(  r  r)  r  r*  r  r+  r  r   r   r   r$   r!  t  sD   
 r!  c                   @  r$  )WatchLoadUpdateReqzList[GetLoadReqOutput]loadsNr'  r   r   r   r$   r,    r(  r,  c                   @  r$  )SetInjectDumpMetadataReqInputr  dump_metadataNr'  r   r   r   r$   r.    r(  r.  c                   @  r$  )SetInjectDumpMetadataReqOutputrb   r&  Nr'  r   r   r   r$   r0    r(  r0  c                   @  r   )LazyDumpTensorsReqInputNr"  r   r   r   r$   r1    r#  r1  c                   @  r$  )LazyDumpTensorsReqOutputrb   r&  Nr'  r   r   r   r$   r2    r(  r2  c                   @  r_  )DumperControlReqInputr   r  r  bodyNr'  r   r   r   r$   r3    ra  r3  c                   @  rV  )	DumperControlReqOutputrb   r&  zList[Dict[str, Any]]responser=  r   errorN)r.   r/   r0   r1   r7  r   r   r   r$   r5    s   
 r5  c                  C  s   ddl } ddl}| |jt | j}|D ]9}|d }|dp(|dp(|d}t|d tp6t|d t	}|rB|sBt
| d|rM|sMt
| dqdS )	zFA helper function to check all request types are defined in this file.r   NReqInputOutputr   z. is not a subclass of BaseReq or BaseBatchReq.z? is a subclass of BaseReq but not follow the naming convention.)inspectsys
getmembersmodulesr.   isclassendswith
issubclassr   r3   r   )r;  r<  all_classes
class_typer~  is_io_structis_base_reqr   r   r$   _check_all_req_types  s$   rF  )r?   
__future__r   r   r   abcr   dataclassesr   r   enumr   typingr   r   r	   r
   r   r   r   torchsglang.srt.lora.lora_registryr   "sglang.srt.managers.schedule_batchr   sglang.srt.multimodal.mm_utilsr   #sglang.srt.sampling.sampling_paramsr   sglang.srt.utilsr   	PIL.Imager   r   r3   r8   r@   rF   tuple__dataclass_fields__keysr   rK   r   ImageDataInputItemAudioDataInputItemVideoDataInputItemMultimodalDataInputItemMultimodalDataInputFormatrR   r   r   r   r   r   r   r  r  r  r  r!  r%  r)  r*  r+  r<  rA  rB  rC  rH  rI  rW  rY  r`  rb  rd  re  ro  rq  rr  rs  rt  ru  rx  ry  rz  r{  r}  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  LoadLoRAAdapterReqOutputUnloadLoRAAdapterReqOutput#LoadLoRAAdapterFromTensorsReqOutputr  r  r  r  r  r  r  r  r  r  r!  r,  r.  r0  r1  r2  r3  r5  rF  r   r   r   r$   <module>   s  $
$

    P 
?
:"			

'
