o
    پi                      @   s  d Z ddlZddlZddlZddlZddlZddlZddlZddlZddl	m
Z
 ddlmZmZ ddlmZmZmZ ddlZddlmZ ddlmZ ddlmZ dd	lmZ dd
lmZ ddlmZmZ ddlZddl m!Z! ddl"m#Z#m$Z$ ddl%m&Z& ddl'm(Z( ddl)m*Z* ddl+m,Z, ddl-m.Z.m/Z/m0Z0 ddl1m2Z3 ddl4m5Z5 ddl6m7Z7 ddl8m9Z9 e:e;Z<e=e>ddZ?de.dej@fddZAdeBdejCfddZDG d d! d!ejEZF	d+d"e5d#ee fd$d%ZGd"e5fd&d'ZH	d+d"e5d(ee( fd)d*ZIdS ),z
Standalone gRPC Server for SGLang - Fully separated from HTTP server.
Uses GrpcRequestManager for orchestration without tokenization.
    N)futures)datetimetimezone)AsyncIteratorDictOptional)MessageToDict)Struct)	Timestamp)health_pb2_grpc)
reflection)sglang_scheduler_pb2sglang_scheduler_pb2_grpc)ModelConfig)FAKE_BOOTSTRAP_HOSTDisaggregationMode)GrpcRequestManager)SGLangHealthServicer)launch_scheduler_process_only)start_disagg_service)GetLoadsReqOutputTokenizedEmbeddingReqInputTokenizedGenerateReqInput)SamplingParams)
ServerArgs)kill_process_tree)get_exception_tracebackSGLANG_HEALTH_CHECK_TIMEOUT   resultreturnc                 C   s6  t j| j| j| j| j| j | j| j| j| j| j	| j
| jd}| jr6|jt j| jj| jj| jj| jjd | jrH|jt j| jj| jjd | jr]|jt j| jj| jj| jj
d | jr|jt j| jj| jj| jj| jj | jj!| jj"| jj#| jj$d | j%r|j%t j&| j%j'| j%j(| j%j)| j%j*d |S )zFConvert GetLoadsReqOutput dataclass to protobuf SchedulerLoad message.)dp_ranknum_running_reqsnum_waiting_reqsnum_total_reqsnum_used_tokensmax_total_num_tokenstoken_usagegen_throughputcache_hit_rateutilizationmax_running_requests)	weight_gbkv_cache_gbgraph_gbtoken_capacity)accept_lengthaccept_rate)
slots_usedslots_totalr*   )modeprefill_prealloc_queue_reqsprefill_inflight_queue_reqsdecode_prealloc_queue_reqsdecode_transfer_queue_reqsdecode_retracted_queue_reqskv_transfer_speed_gb_skv_transfer_latency_ms)waitinggrammarpaused	retracted)+r   SchedulerLoadr!   r"   r#   r%   r&   r'   r(   r)   r*   r+   memoryCopyFromMemoryMetricsr,   r-   r.   r/   speculativeSpeculativeMetricsr0   r1   loraLoRAMetricsr2   r3   disaggregationDisaggregationMetricsr4   r5   r6   r7   r8   r9   r:   r;   queuesQueueMetricsr<   r=   r>   r?   )r   scheduler_load rM   V/home/ubuntu/.local/lib/python3.10/site-packages/sglang/srt/entrypoints/grpc_server.py_convert_loads_to_protobuf/   sx   
		rO   loadsc              
   C   s   | st  S t| }tdd | D }tdd | D }t j|||| ttdd | D | dttdd | D | dttdd | D | dd	S )
zGCompute aggregate metrics from list of SchedulerLoad protobuf messages.c                 s       | ]}|j V  qd S N)r"   .0loadrM   rM   rN   	<genexpr>       z._compute_aggregate_protobuf.<locals>.<genexpr>c                 s   rQ   rR   )r#   rS   rM   rM   rN   rV      rW   c                 s   rQ   rR   )r'   rS   rM   rM   rN   rV      rW      c                 s   rQ   rR   )r(   rS   rM   rM   rN   rV      rW      c                 s   rQ   rR   )r*   rS   rM   rM   rN   rV      rW   )total_running_reqstotal_waiting_reqs
total_reqsavg_token_usageavg_throughputavg_utilization)r   AggregateMetricslensumround)rP   ntotal_runningtotal_waitingrM   rM   rN   _compute_aggregate_protobufx   s   rg   c                   @   s  e Zd ZdZ	d2dededededee f
dd	Z	d
e
jdejjdee
j fddZd
e
jdejjde
jfddZd
e
jdejjde
jfddZd
e
jdejjde
jfddZde
jdejjde
jfddZde
jdejjde
jfddZ d
e
j!dejjde
j"fddZ#de
jde$fddZ%de
jde&fd d!Z'd"e
j(de)fd#d$Z*d%edee
j+ fd&d'Z,d%edee
j- fd(d)Z.d*e/d+ede
jfd,d-Z0d*e/d+ede
jfd.d/Z1d0d1 Z2dS )3SGLangSchedulerServicerz
    Standalone gRPC service implementation using GrpcRequestManager.
    Fully separated from HTTP server with its own process and no shared globals.
    Nrequest_managerserver_args
model_infoscheduler_infohealth_servicerc                 C   s@   || _ || _|| _|| _t | _|| _| j   t	d dS )z'Initialize the standalone gRPC service.z#gRPC scheduler servicer initializedN)
ri   rj   rk   rl   time
start_timerm   auto_create_handle_looploggerinfo)selfri   rj   rk   rl   rm   rM   rM   rN   __init__   s   	

z SGLangSchedulerServicer.__init__requestcontextr    c                 C  sx  t d|j  z|| |}| jj||j|d}|2 zf3 dH W }t|trP|D ]%}d|v rFtj	|jtj
|d d|vr>dnddd	V  q)| |j|V  q)qd|v rktj	|jtj
|d d|vrcdnddd	V  q|d
drz| |j|V  q| |j|V  q6 W dS  ty } z)t d|j d| dt   tj	|jtj
t|dt dd	V  W Y d}~dS d}~ww )z4Handle generation requests with streaming responses.zReceive generation request: )obj
request_idgrpc_contextNerrorabort500499)messagehttp_status_coderx   rz   finishedFzGenerate failed for request : 
)r~   r   details)rq   rr   rx   _convert_generate_requestri   generate_request
isinstancelistr   GenerateResponseGenerateError_create_completion_responseget_create_chunk_response	Exceptionrz   r   str)rs   ru   rv   tokenized_reqresponse_generatoroutputbatch_outputerM   rM   rN   Generate   sl   



	&z SGLangSchedulerServicer.Generate_contextc                    s   t d|j  z0| |}| jj||jdI dH }|I dH }tj|jtj|d |	dddt
|d ddW S  tym } z't d	|j d
| dt   tj|jtjt|dt ddW  Y d}~S d}~ww )zHandle embedding requests.zReceive embedding request: rw   rx   N	embeddingprompt_tokensr   )r   r   cached_tokensembedding_dimrx   completezEmbed failed for request r   r   INTERNAL_ERROR)r~   coder   r   )rq   rr   rx   _convert_embed_requestri   embedding_requestr   EmbedResponseEmbedCompleter   ra   r   rz   r   
EmbedErrorr   )rs   ru   r   r   futurer   r   rM   rM   rN   Embed   sD   




zSGLangSchedulerServicer.Embedc                    s  dt    td  jjr td tjdddS tddd	}|jd
d j	
d}|d
u r;jj }|r[tddg|ddddd
d
d
 jjtjjkrZt _d _nd|_tddgdg idg|d  fdd}t| }t   }t   |t k rtdI d
H  jj|kr|  j tjdddS t   |t k s|  j tdt d tjddt ddS )z
        Check the health of the inference server by sending a special request to generate one token.
        Similar to HTTP server's /health endpoint.
        HEALTH_CHECK_zReceive health check request: zCHealth check request received during shutdown. Returning unhealthy.FzServer is shutting down)healthyr~              )max_new_tokenstemperatureN	tokenizeris_generation r   )
rid
input_text	input_idssampling_paramsreturn_logproblogprob_start_lentop_logprobs_numstream	mm_inputstoken_ids_logprobmm_itemsr   r   r   image_inputstoken_type_idsr   c               
      sf   zj j d2 z	3 d H W }  W dS 6 W dS  ty2 } ztd|  W Y d }~dS d }~ww )Nr   TzHealth check failed: F)ri   r   r   rq   warning)_r   
health_reqr   rs   rM   rN   run_health_checkW  s   	z=SGLangSchedulerServicer.HealthCheck.<locals>.run_health_checkTzHealth check passedzHealth check timeout after s)rn   rq   rr   ri   gracefully_exitr   HealthCheckResponseSGLSamplingParams	normalizerl   r   rj   is_embeddingr   disaggregation_moder   NULLvaluer   bootstrap_hostbootstrap_roomr   r   asynciocreate_taskHEALTH_CHECK_TIMEOUTsleeplast_receive_tstampcancel_cleanup_request_stater   )rs   ru   rv   r   r   r   taskticrM   r   rN   HealthCheck  sv   	

z#SGLangSchedulerServicer.HealthCheckc                    s   t d|j  z| j|jI dH }tj|d|j d|r!dnd dW S  tyR } zt d|j d	| d
t	   tjdt
|dW  Y d}~S d}~ww )zAbort an ongoing request.zReceive abort request: NzRequest  abortedz	not found)successr~   zAbort failed for request r   r   F)rq   rr   rx   ri   abort_requestr   AbortResponser   rz   r   r   )rs   ru   r   r   r   rM   rM   rN   Abort{  s(   zSGLangSchedulerServicer.Abort_requestc                    s  t d | jd}|du r| jj }tjdi d| jjd| jj	p$dd|d| jj
p.dd| jjp5dd	| jjd
| jd
 d| jd d| jd d| jdpUdd| jdp^g d| jd d| jd d| jd d| jd d| jdpdd| jdpdS S )zGet model information.zReceive model info requestr   N
model_pathtokenizer_pathr   preferred_sampling_paramsweight_versionserved_model_namemax_context_length
vocab_sizesupports_vision
model_typearchitectureseos_token_idspad_token_idbos_token_idmax_req_input_lenid2label_json
num_labelsr   rM   )rq   debugrl   r   rj   r   r   GetModelInfoResponser   r   r   r   r   rk   )rs   r   r   r   rM   rM   rN   GetModelInfo  s`   






	






z$SGLangSchedulerServicer.GetModelInfoc           
         s   t d t| j}t } fdd  |}|| t }|| j | j	 }t

 | j }t }	|	t| j tj|||d |d |d |tjd|	d	S )	zGet server information.zReceive server info requestc                    sl   | d u rd S t | ttttfr| S t | tttfr" fdd| D S t | tr2 fdd| 	 D S t| S )Nc                    s   g | ]} |qS rM   rM   )rT   itemmake_serializablerM   rN   
<listcomp>      zTSGLangSchedulerServicer.GetServerInfo.<locals>.make_serializable.<locals>.<listcomp>c                    s   i | ]	\}}| |qS rM   rM   )rT   kvr   rM   rN   
<dictcomp>  s    zTSGLangSchedulerServicer.GetServerInfo.<locals>.make_serializable.<locals>.<dictcomp>)
r   r   intfloatboolr   tuplesetdictitems)rw   r   rM   rN   r     s   
z@SGLangSchedulerServicer.GetServerInfo.<locals>.make_serializableactive_requestsr>   last_receive_timegrpc)	rj   rl   r  	is_pausedlast_receive_timestampuptime_secondssglang_versionserver_typero   )rq   r   dataclassesasdictrj   r	   updaterl   ri   get_server_inforn   ro   r
   FromSecondsr   r   GetServerInfoResponsesglang__version__)
rs   r   r   server_args_dictserver_args_structserializable_argsscheduler_info_structmanager_stateuptimestart_timestamprM   r   rN   GetServerInfo  s0   


z%SGLangSchedulerServicer.GetServerInfoc              
      sd  t d |jrt|jndg}|dr|jnd}z| jj||dI dH }W nm tyL } z|	t
jj |t| t W  Y d}~S d}~w tjye   |	t
jj |d t  Y S  ty } z%t d| dt   |	t
jj |d	|  t W  Y d}~S d}~ww d
d |D }tjttj tjt||t |dS )z
        Get comprehensive load metrics for all DP ranks.

        Uses the communicator pattern to fetch real-time metrics,
        providing full parity with the HTTP /v1/loads endpoint.
        zReceive get loads requestallr!   N)includer!   z&Timeout waiting for scheduler responsezGetLoads failed: r   zFailed to get load metrics: c                 S   s   g | ]}t |qS rM   )rO   )rT   rrM   rM   rN   r     r   z4SGLangSchedulerServicer.GetLoads.<locals>.<listcomp>)	timestampversiondp_rank_countrP   	aggregate)!rq   r   r  r   HasFieldr!   ri   	get_loads
ValueErrorset_coder  
StatusCodeINVALID_ARGUMENTset_detailsr   r   GetLoadsResponser   TimeoutErrorDEADLINE_EXCEEDEDr   rz   r   INTERNALr   nowr   utc	isoformatr  r  ra   rg   )rs   ru   rv   r  r!   resultsr   rP   rM   rM   rN   GetLoads  s@   

z SGLangSchedulerServicer.GetLoadsgrpc_reqc                 C   s   | ds	td|jj}t|jj}| |j}|jdd d}d}d}| drB|j	j
r2|j	j
nd}|j	jr<|j	jnd}|j	j}t|j||d||j|jdurS|jnd|jpXd|jp\d|jrb|jnd|jrkt|jnd|||d	S )
z0Convert gRPC GenerateRequest to internal format.	tokenized Tokenized input must be providedNr   disaggregated_paramsr   r   F)r   r   r   r   r   r   r   r   r   lora_idr   r   bootstrap_portr   )r"  r$  r3  original_textr   r   _convert_sampling_paramsr   r   r5  r   r7  r   r   rx   r   r   r   r   r6  r   )rs   r2  r   r   r   r   r7  r   rM   rM   rN   r     sL   




z1SGLangSchedulerServicer._convert_generate_requestc                 C   sd   | ds	td|jj}t|jj}| |j}d|_|j	dd t
|j||dg it|j|dS )z-Convert gRPC EmbedRequest to internal format.r3  r4  r   Nr   r   r   )r"  r$  r3  r8  r   r   r9  r   r   r   r   rx   r   )rs   r2  r   r   r   rM   rM   rN   r   V  s   
z.SGLangSchedulerServicer._convert_embed_requestgrpc_paramsc                 C   s  d}d}d}d}| dr|j}n| dr|j}n| dr#|j}n| dr+|j}| dr5t|jnd}| dr?|jnd}| drI|jnd}|j	rSt
|j	nd}	|jr]t|jnd}
|jrgt|jnd}tdi d	|jd
|jd|jd|jd|jd|jd|jd|d|jd|
d|d|jd|jd|jd|d|d|d|d|jd|jd|d|	d|S )z/Convert gRPC SamplingParams to internal format.Nregexjson_schemaebnf_grammarstructural_tagcustom_paramsr   stream_intervalr   top_ptop_kmin_pfrequency_penaltypresence_penaltyrepetition_penaltymin_new_tokensstopstop_token_idsskip_special_tokensspaces_between_special_tokensno_stop_trimebnfrd   
ignore_eos
logit_biasrM   )r"  r;  r<  r=  r>  r   r?  r   r@  rO  r  rH  r   rI  r   r   rA  rB  rC  rD  rE  rF  rG  rJ  rK  rL  rd   rN  )rs   r:  r;  r<  r=  r>  r?  r   r@  rO  rH  rI  rM   rM   rN   r9  t  s   



	
z0SGLangSchedulerServicer._convert_sampling_paramslogprobs_datac           	      C   s|   |sdS | dg }| dg }| dg }| dg }g }|r6|r6t||D ]\}}|tj||d q'tj|||dS )zEConvert output logprobs dict to proto (no None values, plain floats).Ntoken_logprobs_valtoken_logprobs_idxtop_logprobs_valtop_logprobs_idxvalues	token_idstoken_logprobsrW  top_logprobs)r   zipappendr   TopLogProbsOutputLogProbs)	rs   rP  rQ  rR  rS  rT  top_logprobs_protoval_listidx_listrM   rM   rN   !_convert_output_logprobs_to_proto  s(   z9SGLangSchedulerServicer._convert_output_logprobs_to_protoc           
      C   s   |sdS | dg }| dg }| dg }| dg }dd |D }g }|r=|r=t||D ]\}}	|tj||	d q.tj|||d	S )
zYConvert input logprobs dict to proto (first token is None, wrapped in InputTokenLogProb).NrQ  rR  rS  rT  c                 S   s(   g | ]}|d u rt  nt j|dqS )N)r   )r   InputTokenLogProb)rT   xrM   rM   rN   r     s    
zLSGLangSchedulerServicer._convert_input_logprobs_to_proto.<locals>.<listcomp>rU  rX  )r   r[  r\  r   r]  InputLogProbs)
rs   rP  rQ  rR  rS  rT  token_logprobs_wrappedr_  r`  ra  rM   rM   rN    _convert_input_logprobs_to_proto  s.   
z8SGLangSchedulerServicer._convert_input_logprobs_to_protorx   r   c                 C   sv   | di }| | d}| | d}tj|tj| dg | dd| dd| dd||| d	dd
dS )z"Create a streaming chunk response.	meta_infooutput_logprobsinput_logprobsrW  r   r   completion_tokensr   index)rW  r   rk  r   ri  rj  rl  )rx   chunk)r   rb  rg  r   r   GenerateStreamChunk)rs   rx   r   rh  output_logprobs_protoinput_logprobs_protorM   rM   rN   r     s&   




z.SGLangSchedulerServicer._create_chunk_responsec                 C   s  | di }| d}d}|r)t|tr| d}n|}|dkr#d}n|dkr)d}i }t|trKd|v rK|d }t|trB||d< n	t|trK||d	< | | d
}	| | d}
tj|tj	d| dg || dd| dt
| dg | dd|	|
| ddd|dS )zCreate a completion response.rh  finish_reasonrH  typelengthr{   matchedmatched_token_idmatched_stop_strri  rj  rW  r   r   rk  r   rl  )
output_idsrq  r   rk  r   ri  rj  rl  r   NrM   )r   r   r  r   r   rb  rg  r   r   GenerateCompletera   )rs   rx   r   rh  finish_reason_datarq  finish_reason_typematched_stop_kwargsrt  ro  rp  rM   rM   rN   r     sR   








z3SGLangSchedulerServicer._create_completion_responsec                    s0   t d | jr| j  | j I dH  dS )zShutdown the service.zShutting down gRPC serviceN)rq   rr   rm   set_not_servingri   shutdown)rs   rM   rM   rN   r}  T  s
   

z SGLangSchedulerServicer.shutdownrR   )3__name__
__module____qualname____doc__r   r   r   r   r   rt   r   GenerateRequestr  aioServicerContextr   r   r   EmbedRequestr   r   HealthCheckRequestr   r   AbortRequestr   r   GetModelInfoRequestr   r   GetServerInfoRequestr  r  GetLoadsRequestr)  r1  r   r   r   r   r   r   r9  r^  rb  re  rg  r   r   r   r}  rM   rM   rM   rN   rh      s    

G
.
^

#
4
/
<

D

'

;rh   rj   rk   c                    s  d}| j dkrt| }|rtd| j d| j  td t| d\}}}t| }|du r|j	}t
|dd}t
|dd	pAd	}	|sP|	rPd
d t|	D }n|rX|	sXt|}	|r_t|nd}
| j|d| jpjd|dd|ddt
|ddt
|dd|dd|dg |dd	|dd|
|	pd	d}t| ||d}tjjtjddddgd }t||d!}t|| t|| |||d"}t|| tjj d# j!d$t"j#f}t"$|| | j d| j% }|&| |' I dH  td%|  t(j)t*| |fd&}|'  t+, }t+-   fd'd(}t.j/t.j0fD ]	}|1|| qzm 2 I dH  W td) |3 I dH  |4d*I dH  |5 rPtd+ |j6d*d, t7|D ]5\}}|5 rtd-| d. |8  |j6d/d, |5 rt9d0| d1 |:  |j6d2d, qTtd3 dS td) |3 I dH  |4d*I dH  |5 rtd+ |j6d*d, t7|D ]5\}}|5 rtd-| d. |8  |j6d/d, |5 rt9d0| d1 |:  |j6d2d, qtd3 w )4z;Start the standalone gRPC server with integrated scheduler.Nprefillz4Bootstrap server started for disaggregation mode on :z"Launching scheduler process(es)...)rj   id2labelr   r   c                 S   s   i | ]}|d | qS )LABEL_rM   )rT   irM   rM   rN   r     s    zserve_grpc.<locals>.<dictcomp>r   r&   i    r   i  r   Fr   r   r   r   r   r   r   )
model_namer   r   r   r   r   r   r   r   r   r   r   )rj   	port_argsbootstrap_server
   )max_workerszgrpc.max_send_message_length   zgrpc.max_receive_message_lengthr  options)ri   rl   )ri   rj   rk   rl   rm   SglangSchedulerzgrpc.health.v1.HealthzgRPC server listening on )targetargsc                      s   t d    d S )NzReceived shutdown signal)rq   rr   r   rM   
stop_eventrM   rN   signal_handler  s   
z"serve_grpc.<locals>.signal_handlerzShutting down gRPC serverg      @z&Waiting for warmup thread to finish...timeoutzTerminating scheduler process z...g       @zScheduler process z did not terminate, killing...g      ?z"All scheduler processes terminated);r   r   rq   rr   hostdisaggregation_bootstrap_portr   r   from_server_args	hf_configgetattrrangera   jsondumpsr   r   context_lengthr   r  r  serverr   ThreadPoolExecutorr   r   add_HealthServicer_to_serverrh   r   %add_SglangSchedulerServicer_to_serverr   
DESCRIPTORservices_by_name	full_namer   SERVICE_NAMEenable_server_reflectionportadd_insecure_portstart	threadingThread_wait_and_warmup_grpcr   get_running_loopEventsignalSIGTERMSIGINTadd_signal_handlerwaitr}  rH  is_alivejoin	enumerate	terminater   kill)rj   rk   r  rl   r  scheduler_procsmodel_configr  r  r   r   ri   r  rm   servicerSERVICE_NAMESlisten_addrwarmup_threadloopr  sigr  procrM   r  rN   
serve_grpc`  s   












	












r  c                 C   sF  zh| j  d| j }tj|ddgd}t|}d}d}tdD ]-}td zt	
 }|j|d	d
}d}W  n tyN }	 z
t|	}W Y d}	~	q!d}	~	ww |shd| }
t|
 |  tt  W dS |j}td |rtdnd}|rdt  t	jg dddt	jd|ddd}| jtjjkrt	jtdd|d< t	jd%i |}z0t|j |dd
}|r|d !dstd d}n|r|d jj"nd}
t#d|
  d}W n ty }	 zd|	 }
t|
 |  tt  W Y d}	~	W dS d}	~	ww t	j$dt  t	jg d d!dd"}z#|j%|dd
}|!ds,td d}nt#d|jj"  d}W n) tyb }	 zd|	 }
t|
 |  tt  W Y d}	~	W dS d}	~	ww |  |W S  ty }	 z,d#|	 d$t&  }
t|
 z|  W n
 ty   Y nw tt  W Y d}	~	dS d}	~	ww )&zKExecute warmup for gRPC server by checking health and sending test request.r  r  r  r  FNx   r      r  TzVgRPC server warmup failed: Could not connect to server after 120 seconds. Last error: z(Sending warmup request to gRPC server...   WARMUP_){   i  i     i7  iz  iY  zwarmup request)r   r8  r   )r   r   )rx   r3  r   r   r   )r   r   r5  iX  r   rz   z*gRPC warmup request completed successfullyzNo responsez$gRPC warmup request returned error: zgRPC warmup request failed: )r        ztest embedding)rx   r3  z#gRPC warmup failed with exception: r   rM   )'r  r  r  insecure_channelr   SglangSchedulerStubr  rn   r   r   r  r   r   r   rq   rz   closer   osgetpidr   rr   TokenizedInputr   r   r   r   r   DisaggregatedParamsr   r  r   r   r"  r~   r   r  r   r   )rj   grpc_urlchannelstubr   
last_errorr   ru   responser   	error_msgr   r   warmup_request_kwargswarmup_request	responsesrM   rM   rN   _execute_grpc_server_warmup  s   




	






r  rm   c                 C   s8   | j s
t| s	dS ntd |r|  td dS )z4Wait for gRPC server to be ready and execute warmup.Nz5Skipping gRPC server warmup (skip_server_warmup=True)z)The server is fired up and ready to roll!)skip_server_warmupr  rq   rr   set_serving)rj   rm   rM   rM   rN   r    s   
r  rR   )Jr  r   r  r  loggingr  r  r  rn   
concurrentr   r   r   typingr   r   r   r  google.protobuf.json_formatr   google.protobuf.struct_pb2r	   google.protobuf.timestamp_pb2r
   grpc_health.v1r   grpc_reflection.v1alphar   smg_grpc_protor   r   r  sglang.srt.configs.model_configr   sglang.srt.disaggregation.utilsr   r   $sglang.srt.grpc.grpc_request_managerr   sglang.srt.grpc.health_servicerr   "sglang.srt.grpc.scheduler_launcherr   "sglang.srt.managers.disagg_servicer   sglang.srt.managers.io_structr   r   r   #sglang.srt.sampling.sampling_paramsr   r   sglang.srt.server_argsr   sglang.srt.utilsr   sglang.utilsr   	getLoggerr~  rq   r   getenvr   r@   rO   r   r`   rg   SglangSchedulerServicerrh   r  r  r  rM   rM   rM   rN   <module>   s    

I
     Z
 " 