o
    پiC                     @  sr  d dl mZ d dlZd dlZd dlZd dlmZ d dlmZmZm	Z	m
Z
mZ d dlZd dlmZmZ d dlmZ d dlmZ d dlmZ d d	lmZmZmZ d d
lmZ d dlmZmZmZm Z  d dl!m"Z"m#Z# d dl$m%Z% d dl&m'Z' d dl(m)Z)m*Z*m+Z+ d dl,m-Z- d dl.m/Z/ d dl0m1Z1m2Z2m3Z3m4Z4 erd dl5m6Z6 d dl!m7Z7 d dl8m9Z9 e4 Z:e3dZ;e<e=Z>	dudvddZ?dwd$d%Z@dxd(d)ZAdyd+d,ZBdyd-d.ZCdyd/d0ZD	dudzd7d8ZEd{d;d<ZFd|dAdBZGd}dFdGZHd~dIdJZIG dKdL dLZJG dMdN dNZKG dOdP dPZLddQdRZM	dudd^d_ZNddcddZOddedfZPddhdiZQddjdkZRddodpZSdqdr ZTG dsdt dte%ZUdS )    )annotationsN)replace)TYPE_CHECKINGDictListOptionalSequence)execute_operationsexecute_overlapped_operations)OperationsStrategy)deep_gemm_wrapper)AttentionBackend)CommunicateContextCommunicateSummableTensorPairFnScatterMode)get_attention_tp_size)get_deepep_modeget_moe_a2a_backend$get_tbo_token_distribution_thresholdis_tbo_enabled)DeepEPDispatcherMooncakeEPDispatcher)BaseDispatcher)ScheduleBatch)ForwardBatchForwardModecompute_position)get_global_server_args)	SpecInput)BumpAllocatorempty_contextget_bool_env_varis_hip)CombineOverlapArgs)DispatchOutput)EagleVerifyInputSGLANG_TBO_DEBUGforward_moder   	spec_infoOptional[SpecInput]c                 C  s*   |   r|jS |  rdS |  rdS d S )N   r   )is_target_verifydraft_token_num	is_decodeis_idler'   r(    r0   ^/home/ubuntu/.local/lib/python3.10/site-packages/sglang/srt/batch_overlap/two_batch_overlap.pyget_token_num_per_seq<   s   r2   
num_tokensintextend_lensOptional[Sequence[int]]token_num_per_seqOptional[int]returnc                 C  sl   | t jkr|d usJ t|S |  s|  r#|d usJ || d S |  s+|  r3|dks1J dS t N   r   )r   EXTEND_split_extend_seqsr+   r-   r.   is_prebuiltNotImplementedErrorr'   r3   r5   r7   r0   r0   r1   compute_split_seq_indexL   s   
rA   Sequence[int]boolc                 C  sd   | d u rdS t | }t| d | }t| }t }|dks$J d|||| k p1||d|  kS )NFg      ?z
threshold=r*   )_split_array_by_balanced_sumsumr   )r5   vanilla_split_seq_indexleft_sumoverall_sum	thresholdr0   r0   r1   _is_two_chunk_split_enabled_   s   rJ   arrc                 C  s   t | rt| S t| S N)rJ   "_split_array_by_cum_less_than_halfrD   )rK   r0   r0   r1   r=   m   s   r=   c                 C  sL   d}t | }|d }d}tt| D ]}|| | 7 }||kr#|} |S q|S )Nr   r;   )rE   rangelen)rK   rG   rH   half_sumchosen_indexir0   r0   r1   rM   t   s   rM   c                 C  sj   t | }d}td}d}tdt| D ]}|| |d  7 }|| }t|| }||kr0|}|}q |S |S )Nr   infr*   )rE   floatrN   rO   abs)rK   rH   rG   min_diff
best_indexrR   	right_sumdiffr0   r0   r1   rD      s   rD   batchr   	cpu_fieldstrdevice_field	sum_fieldc                 C  s   t | |d }t | |d }|d u s|d u st|tjs!t|ts!d S t|tjr)|ntj||jdjt j	dd}t
| || |d urZt|tjrN|  nt|}t
| || d S d S )NdtypeTdevicenon_blocking)getattr
isinstancetorchTensorlisttensorr`   tor   rb   setattrrE   item)rZ   r[   r]   r^   	cpu_valueold_device_valuenew_device_value	sum_valuer0   r0   r1   +_update_device_and_sum_field_from_cpu_field   s,   



rq   	seq_indexOptional[EagleVerifyInput]c                 C  sN   | dkrdS d}t | |jjd }t|D ]}||j| |j |j 7 }q|S Nr   )minseq_lens_cpushaperN   r,   )rr   r(   offsetmax_seq_lenrR   r0   r0   r1   _compute_mask_offset   s   rz   start_seq_indexend_seq_indexstart_token_indexend_token_indexc                 C  s  | d u rd S | j d ur| j || }nd }| jd urH| j d urHt|| }|| jjd kr3| jjd }nt|| }||krD| j|| }n| j}n| j}| jd urX| j|| }	nd }	| jd urg| j|| }
nd }
| jd urv| j|| }nd }| jd ur| j|| }nd }| j	d ur| j	|| }nd }| jd ur| j|| }nd }|d ur|
 }nd }t| |||	|
|||||d
}|S )Nr   )	custom_maskdraft_token	positionsretrive_indexretrive_next_tokenretrive_next_siblingretrive_cum_lenrv   seq_lens_sum)r   r   rz   rv   rw   r   r   r   r   r   rE   r   )r(   r{   r|   r}   r~   r   custom_mask_startcustom_mask_endr   r   r   r   r   r   rv   r   output_spec_infor0   r0   r1   split_spec_info   sd   









r   split_seq_index'ForwardMode'extend_seq_lensc                 C  sz   |t jkr|d usJ t|rt|d S t|d |  S | s%| r/|d us+J | | S | r;| dks9J dS tr:   )r   r<   rJ   rE   r+   r-   r.   r?   r   r'   r   r7   r0   r0   r1   compute_split_token_index  s   
r   cuda_graph_num_tokensc                 C  sH   | t jkr| nt j}t| |d}t||d |d}t||d |d}||fS )Nr/   r@   r   )r   IDLEDECODEr2   rA   r   )r'   r   r(   forward_mode_for_tbo_splitr7   tbo_split_seq_indextbo_split_token_indexr0   r0   r1   +compute_split_indices_for_cuda_graph_replay  s$   r   c                   @  s(   e Zd Zdd ZdddZdddZdS )TboCudaGraphRunnerPluginc                 C  s   t jdt jd| _d S )N)r;   r_   )rf   zerosint32"_tbo_children_num_token_non_padded)selfr0   r0   r1   __init__:  s   z!TboCudaGraphRunnerPlugin.__init__rZ   r   r3   r4   c                 C  sj   t  sd S t|j|jd}t|j|d |d|_|jd us#J d|t|| jd< tj	|| jd d S )Nr/   r@   znum_tokens=.!tbo_children_num_token_non_padded)
r   r2   r'   r(   rA   r   TboForwardBatchPreparer)compute_tbo_children_num_token_non_paddedr   prepare_raw)r   rZ   r3   r7   r0   r0   r1   capture_one_batch_size=  s$   
z/TboCudaGraphRunnerPlugin.capture_one_batch_sizer'   r   bsnum_token_non_paddedr(   r)   c                 C  s:   t ||d}t||| |d\}}tj||d| jd< d S )Nr/   )r'   r   r(   r   r   .)r2   r   r   -compute_tbo_children_num_token_non_padded_rawr   )r   r'   r   r   r(   r7   r   r   r0   r0   r1   replay_prepareV  s   	z'TboCudaGraphRunnerPlugin.replay_prepareN)rZ   r   r3   r4   )r'   r   r   r4   r   r4   r(   r)   )__name__
__module____qualname__r   r   r   r0   r0   r0   r1   r   9  s    
r   c                   @  sB   e Zd ZdddZdd Zedd Zed	d
 Zedd ZdS )TboDPAttentionPreparerlocal_batchr   c           
      C  s   t  }t   }t }|| _|d ur`t|j|jd}|j s%|j	 r,|
 | }n|j r4d}n|j}t|j||j|d| _||j}| jd uo^|j oW|j  o]|o]|  }nd| _d}| |}	||	fS )Nr/   r   r@   T)r   r   is_noner   enable_two_batch_overlapr2   r'   r(   r+   r-   
batch_sizer>   extend_num_tokensrA   r5   local_tbo_split_seq_indexresolveis_extend_in_batch	is_extendis_low_latency_compute_local_forward_mode)
r   r   deepep_modeenable_a2a_moer   r7   r3   resolved_deepep_modelocal_can_run_tbolocal_forward_moder0   r0   r1   prepare_all_gatherq  sF   



	
z)TboDPAttentionPreparer.prepare_all_gatherc                 C  sj   t |d d df  }|d d df  }| |\}}| jo#|o#|}|r)| jnd }|r/|nd }||fS )Nr   r*   )ru   tolist_compute_global_forward_moder   r   )r   partial_global_infolocal_can_run_tbo_aggregatedforward_modesglobal_forward_modeforward_mode_agreecan_run_tbor   r0   r0   r1   compute_output  s   z%TboDPAttentionPreparer.compute_outputc                 C  s   | d ur| j jS tjjS rL   )r'   r   r   value)r   r0   r0   r1   r     s   z2TboDPAttentionPreparer._compute_local_forward_modec                 C  sB   dd | D }|st jdfS t|}|rt |d nd }||fS )Nc                 S  s(   g | ]}|t jjkr|t jjkr|qS r0   )r   r   r   PREBUILT).0xr0   r0   r1   
<listcomp>  s
    zGTboDPAttentionPreparer._compute_global_forward_mode.<locals>.<listcomp>Fr   )r   r   r   _is_all_same)r   )forward_modes_excluding_idle_and_prebuiltr   r   r0   r0   r1   r     s   
z3TboDPAttentionPreparer._compute_global_forward_modec                   s   t  fdd D S )Nc                 3  s    | ]	}| d  kV  qdS )r   Nr0   )r   r   r   r0   r1   	<genexpr>  s    z6TboDPAttentionPreparer._is_all_same.<locals>.<genexpr>)allr   r0   r   r1   r     s   z#TboDPAttentionPreparer._is_all_sameN)r   r   )	r   r   r   r   r   staticmethodr   r   r   r0   r0   r0   r1   r   p  s    
0

r   c                   @  sp   e Zd Zed$d%ddZed&d
dZed'ddZed(ddZed)ddZed*dd Z	ed)d!d"Z
d#S )+r   FrZ   r   is_draft_workerrC   c                 C  s.   |j d u s|r	d S | |}| j||d d S )Nr   )r   r   r   )clsrZ   r   r   r0   r0   r1   prepare  s   
zTboForwardBatchPreparer.preparer   torch.Tensorc                 C  s
  ddl m} | |}|jtjkot|j}tr3t	
d| d|j d| d|j d|j d|j  t|j|s;J |jj\}}|\}}	| j|d|d|rR|jd	 n|j||d
}
| j|||jjd |j|j||	d
}|rw| j||
||jd |jd u s~J |
|g|_d S )Nr   )TboAttnBackendz4TboForwardBatchPreparer.prepare is_enable_two_chunk=z tbo_split_seq_index=z tbo_split_token_index=z extend_seq_lens=z bs=z forward_mode=r*   )r}   r~   r{   r|   output_attn_backendout_num_token_non_padded)child_achild_br   )'sglang.srt.layers.attention.tbo_backendr   _compute_split_token_indexr'   r   r<   rJ   extend_seq_lens_cpu
_tbo_debugloggerinfor   r   re   attn_backendchildrenfilter_batch	input_idsrw   .derive_fields_related_to_seq_len_for_two_chunktbo_children)r   rZ   r   r   r   is_enable_two_chunkattn_backend_child_aattn_backend_child_bout_num_token_non_padded_aout_num_token_non_padded_br   r   r0   r0   r1   r     sl   



z#TboForwardBatchPreparer.prepare_rawr   r   r   r4   c                C  s.  |j }t|}|d }|t|d |  }|| | }	t|j |_ ||j d< t|j |_ |	|j d< ||fD ]
}
t|
dddd q7|j|ksRJ d|jd	|t|j|_|j d |jd  |jd< t|d
ddd t|j|_|jd  |7  < t|ddd d tt	 j
|j|j|j\}|_d S )Nr;   r   r   r   r   )rZ   r[   r]   r^   zchild_a.extend_num_tokens=z, half_seq_lens_sum=rv   seq_lensr   extend_prefix_lens_cpuextend_prefix_lens)r   rE   copydeepcopyrq   r   rv   r   r   r   attention_backendr   r   extend_start_loc)r   rZ   r   r   r   r   overall_seq_lens_sumhalf_seq_lens_sumleft_last_seq_token_numright_first_seq_token_numchild_r0   r0   r1   r   "  sZ   	



zFTboForwardBatchPreparer.derive_fields_related_to_seq_len_for_two_chunkr}   r~   r{   r|   r   r   r   c             
   C  sF  ||ksJ d|d|d| |j jd }|j}	t }
dD ]&}t||}|jd |ks=J d|d|d|d	|||| |
|< qt }|| d
 | d
 | |
d< dD ]L}t||}|d u reqY|j r|dks|dks|dks|dks|dks|dkrd |
|< qYt||	ksJ d|d|d|	d	|||| |
|< qYt|d}t	|||||d}||
d< dD ]	}t|||
|< q|j st
|j |j|jksJ d|t
|
d |
d }t jd
kr|jd ur|| }|}nd }|
td:i d|| dd|
v r|
d  nd d|d|d|d d d!d d"||fd#d d$d d%d d&d d'|d(d d)d d*d d+d,d-d d.d,d/d d0d d1d d2d d3d d4d, g }ttD ]&}t||jd ur|j|
vr|d5|j d6t||j d	| d7 q`t|dkrtt| d8d9| td:i |
S );Nzend_token_index=z, start_token_index=z, batch=r   )r   r   out_cache_loczkey=z old_value=z num_tokens=z batch=r*   tbo_padded_len)
req_pool_indicesr   rv   r   r   r   r   r   extend_logprob_start_lens_cpulora_idsr   r   r   r   r   r   z
 num_seqs=r(   )r(   r}   r~   r{   r|   )r'   r   return_logprobreq_to_token_pooltoken_to_kv_poolcan_run_dp_cuda_graphdp_padding_moder   is_prefill_onlyspec_algorithmcapture_hidden_modepadded_static_lenmrope_positionssplit_indexorig_seq_lenszbatch=r   r'   r   r   rv   r   r   r   num_token_non_padded_cpur   tbo_parent_token_ranger   original_global_num_tokens_cpuglobal_num_tokens_gpuglobal_num_tokens_cpuglobal_dp_buffer_len!global_num_tokens_for_logprob_gpu!global_num_tokens_for_logprob_cpusampling_infotemp_scaled_logprobsFtemperaturetop_p_normalized_logprobstop_p	mm_inputstop_logprobs_numstoken_ids_logprobsnext_token_logits_buffer return_hidden_states_before_normzField z, has value, but is not yet supported (value=)z errors happen:
z

r0   )r   rw   r   dictrd   r   r'   r+   rO   r   _compute_extend_num_tokensr   r   moe_dense_tp_sizer  updaterE   dataclassesfieldsr   nameappend	Exceptionjoin)r   rZ   r}   r~   r{   r|   r   r   r3   num_seqsoutput_dictkey	old_valueattention_tp_sizer(   r   r   sum_lenr  errorsfieldr0   r0   r1   r   `  s   







	$ z$TboForwardBatchPreparer.filter_batchc                 C  s   | j | |t|jdS )Nr   )r   r   rO   r   )r   rZ   r0   r0   r1   r     s   zATboForwardBatchPreparer.compute_tbo_children_num_token_non_paddedr   r   c                 C  s:   t ||}td|| }tj||gtjdjt jddS )Nr   r_   Tra   )ru   maxrf   ri   r   rj   r   rb   )r   r   r   value_avalue_br0   r0   r1   r     s
   
zETboForwardBatchPreparer.compute_tbo_children_num_token_non_padded_rawc                 C  s&   t |j|jd}t|j|j|j|dS )Nr/   r   )r2   r'   r(   r   r   r   )r   rZ   r7   r0   r0   r1   r     s   z2TboForwardBatchPreparer._compute_split_token_indexN)F)rZ   r   r   rC   )rZ   r   r   r   )rZ   r   r   r   r   r   r   r4   )rZ   r   r}   r4   r~   r4   r{   r4   r|   r4   r   r   r   r   )rZ   r   )r   r4   r   r4   )r   r   r   classmethodr   r   r   r   r   r   r   r0   r0   r0   r1   r     s     @= 
r   c                 C  s2   |  s| s| rd S | r| jd S trt   )r-   r.   r+   r   rw   r?   )r   r'   r0   r0   r1   r    s   
r  
enable_tbor   r   forward_batchhidden_statesinput_data_scatter_moder   residualOptional[torch.Tensor]zero_allocatorOptional[BumpAllocator]c                 C  sJ   t |||||d}| d jj}	t| |j}
|r t||
||	dS t||
S )N)r   r5  r4  r7  r9  r   )inputsoperations_strategyr6  layer_input_scatter_mode)r  layer_scatter_modeslayer_input_moder   init_new_tbor   _model_forward_tbo_model_forward_non_tbo)layersr3  r   r4  r5  r6  r7  r9  r;  r=  r<  r0   r0   r1   model_forward_maybe_tbo-  s&   

rD  r<  r   r=  c                 C  s   t di | ||d}| d jd }~ trt nt|j}| t||jgd d|j	gd}W d    n1 s:w   Y  t
g ||R  S )N)r6  r=  r5  r   r;   )
inputs_arroperations_arrdelta_stagesr0   )_model_forward_tbo_split_inputsrw   _is_hipr    r   configure_deep_gemm_num_smsdeep_gemm_num_smsr
   
operationstbo_delta_stages _model_forward_tbo_merge_outputs)r;  r<  r6  r=  rE  original_hidden_states_lencontextoutputs_arrr0   r0   r1   rA  M  s,   

rA  c                 C  s   t | |j}|d |d fS )Nr5  r7  )r	   rL  )r;  r<  outputsr0   r0   r1   rB  m  s   rB  
List[Dict]c              	     s^   t jt tj||| ||d\} }t| ||||d}fdd  fdd|D S )Nhidden_states_input_moderesidual_input_modeoutput_moder5  r7  r4  rP  r5  r7  r   r4  r9  c              	     s2   t j| || d\} }td| ||d|S )NrT  )r5  r7  r4  r0   )r   executer  )r5  r7  r4  kwargs)rP  r=  tbo_splitter_scatter_moder0   r1   _post_transform  s    
	z8_model_forward_tbo_split_inputs.<locals>._post_transformc                   s   g | ]	} d i |qS r0   r0   )r   r;  )r\  r0   r1   r     s    z3_model_forward_tbo_split_inputs.<locals>.<listcomp>)r   TP_ATTN_FULLr   init_newr   rY  #_model_forward_tbo_split_inputs_raw)r5  r7  r   r4  r9  r6  r=  rE  r0   )r\  rP  r=  r[  r1   rH  r  s(   	

rH  c                   s    fddt |jD S )Nc                   sB   g | ]\}}t di t ||d durt dni qS ))r5  r7  r   output_forward_batchtbo_subbatch_indexN)r9  r0   )r  _model_forward_filter_inputs)r   rb  ra  r5  r   r7  r9  r0   r1   r     s     

z7_model_forward_tbo_split_inputs_raw.<locals>.<listcomp>)	enumerater   rX  r0   rd  r1   r`    s
   r`  ra  rb  r   c                   sl   t |j }| | } |d u rd n|| }|| }|jd usJ |j  fdd}t|| ||||||dS )Nc                   sZ   | d u rd S | j d  kr| S tj g| j dd  R | j| jd}| |d | j d < |S )Nr   r*   r`   rb   )rw   rf   r   r`   rb   )r   res
padded_lenr0   r1   _pad  s   &z*_model_forward_filter_inputs.<locals>._pad)r5  r7  r   r4  rb  )slicer  r   r  )r5  r7  r   ra  rb  token_slicerj  r0   rh  r1   rc    s   

rc  c                   s     fdd}|d|dfS )Nc                   s   |  }|  }|d u |d u ksJ |d u rd S d j \}}d j \}}tj g|jdd  R |j|jd}|d ||  |t||< |d ||  |t||< |S )Nr4  r*   rf  )r  rf   r   rw   r`   rb   rk  )r#  r0  r1  s0t0s1t1rg  original_lenoutput_aoutput_br0   r1   _handle_key  s   z5_model_forward_tbo_merge_outputs.<locals>._handle_keyr5  r7  r0   )rs  rt  rr  ru  r0   rq  r1   rN    s   rN  c                      s   e Zd Z fddZd$d%ddZd&d
dZdd Zdd Zd'ddZdd Z	dd Z
dd Zd( fddZd) fd d!Z fd"d#Z  ZS )*MaybeTboDeepEPDispatcherc                   sh   t    t r
dnd}t  r fddt|D | _d S t  r2 fddt|D | _d S d S )Nr;   r*   c                      g | ]	}t d i  qS r]  )r   r   r   rZ  r0   r1   r         z5MaybeTboDeepEPDispatcher.__init__.<locals>.<listcomp>c                   rw  r]  )r   rx  ry  r0   r1   r     rz  )superr   r   r   	is_deepeprN   _innersis_mooncake)r   rZ  num_inner_dispatchers	__class__ry  r1   r     s   




z!MaybeTboDeepEPDispatcher.__init__Nrb  r8   c                 K  s   t | j|pd |di |S )Nr   r0   )rd   r}  )r   r#  rb  rZ  r0   r0   r1   _execute  s   z!MaybeTboDeepEPDispatcher._executer9   r$   c                 K     | j di |S )Ndispatch)r  r  r   rZ  r0   r0   r1   r  
     z!MaybeTboDeepEPDispatcher.dispatchc                 K  r  )N
dispatch_a)r  r  r  r0   r0   r1   r    r  z#MaybeTboDeepEPDispatcher.dispatch_ac                 K  r  )N
dispatch_b)r  r  r  r0   r0   r1   r    r  z#MaybeTboDeepEPDispatcher.dispatch_br   c                 K  r  )Ncombine)r  r  r  r0   r0   r1   r    r  z MaybeTboDeepEPDispatcher.combinec                 K  r  )N	combine_a)r  r  r  r0   r0   r1   r    r  z"MaybeTboDeepEPDispatcher.combine_ac                 K  r  )N	combine_b)r  r  r  r0   r0   r1   r    r  z"MaybeTboDeepEPDispatcher.combine_bc                 C  s$   g }| j D ]
}||| q|S rL   )r}  r$  register_deepep_dispatch_hook)r   hookhandle_listinnerr0   r0   r1   r    s   
z6MaybeTboDeepEPDispatcher.register_deepep_dispatch_hookquant_configr  c                   s&   t  | | jD ]}|| q	d S rL   )r{  set_quant_configr}  )r   r  r  r  r0   r1   r  "  s   
z)MaybeTboDeepEPDispatcher.set_quant_configcombine_overlap_argsr#   meta_overlap_argsc                   s*   t  || | jD ]}||| q
d S rL   )r{  set_overlap_argsr}  )r   r  r  r  r  r0   r1   r  '  s   
z)MaybeTboDeepEPDispatcher.set_overlap_argsc                   s"   t    | jD ]}|  qd S rL   )r{  clear_overlap_argsr}  )r   r  r  r0   r1   r  .  s   


z+MaybeTboDeepEPDispatcher.clear_overlap_argsrL   )rb  r8   )r9   r$   )r9   r   )r  r  )r  r#   r  r  )r   r   r   r   r  r  r  r  r  r  r  r  r  r  r  __classcell__r0   r0   r  r1   rv    s    

rv  rL   )r'   r   r(   r)   )
r'   r   r3   r4   r5   r6   r7   r8   r9   r8   )r5   rB   r9   rC   )rK   rB   r9   r4   )rZ   r   r[   r\   r]   r\   r^   r\   )rr   r4   r(   rs   r9   r4   )
r(   rs   r{   r4   r|   r4   r}   r4   r~   r4   )
r   r4   r'   r   r   r6   r7   r8   r9   r4   )r'   r   r   r4   r(   r)   )r'   r   )r3  rC   r   r   r4  r   r5  r   r6  r   r7  r8  r9  r:  )r<  r   r6  r   r=  r   )r<  r   )r5  r   r7  r   r   r   r4  r   r9  r:  r6  r   r=  r   r9   rS  )r5  r   r7  r   r   r   r4  r   r9  r:  r9   rS  )r5  r   r7  r   r   r   ra  r   rb  r4   r9   r   )V
__future__r   r   r!  loggingr   typingr   r   r   r   r   rf   #sglang.srt.batch_overlap.operationsr	   r
   ,sglang.srt.batch_overlap.operations_strategyr   sglang.srt.layersr   -sglang.srt.layers.attention.base_attn_backendr   sglang.srt.layers.communicatorr   r   r   sglang.srt.layers.dp_attentionr   sglang.srt.layers.moer   r   r   r   &sglang.srt.layers.moe.token_dispatcherr   r   +sglang.srt.layers.moe.token_dispatcher.baser   "sglang.srt.managers.schedule_batchr   ,sglang.srt.model_executor.forward_batch_infor   r   r   sglang.srt.server_argsr    sglang.srt.speculative.spec_infor   sglang.srt.utilsr   r    r!   r"   -sglang.srt.batch_overlap.single_batch_overlapr#   r$   !sglang.srt.speculative.eagle_infor%   rI  r   	getLoggerr   r   r2   rA   rJ   r=   rM   rD   rq   rz   r   r   r   r   r   r   r  rD  rA  rB  rH  r`  rc  rN  rv  r0   r0   r0   r1   <module>   sp    








H
7d  
L
 
 

2
"