o
    پi                     @  s   d dl mZ d dlmZ d dlmZ d dlZd dlmZ d dl	m
Z
 d dlmZ d dlmZ G d	d
 d
ZeG dd dZeG dd dZdd ZdS )    )annotations)	dataclass)OptionalN)envs)get_moe_runner_backend)is_sbo_enabled)is_blackwellc                   @  s<   e Zd Zedd Zedd Zedd Zedd Zd	S )
SboFlagsc                 C  s"   t  ot  pt  ot  S N)r   r   is_flashinfer_cutedslis_deep_gemmr   cls r   a/home/ubuntu/.local/lib/python3.10/site-packages/sglang/srt/batch_overlap/single_batch_overlap.py+enable_combine_down_gemm_two_stream_overlap   s   
z4SboFlags.enable_combine_down_gemm_two_stream_overlapc                 C  s   t  o|   otj  S r
   )r   )enable_dispatch_shared_one_stream_overlapr   3SGLANG_BLACKWELL_OVERLAP_SHARED_EXPERTS_OUTSIDE_SBOgetr   r   r   r   (enable_combine_shared_two_stream_overlap*   s
   
z1SboFlags.enable_combine_shared_two_stream_overlapc                 C  s   t  ot  S r
   )r   r   r   r   r   r   r   2   s   z2SboFlags.enable_dispatch_shared_one_stream_overlapc                 C  s   |   p|  S r
   )r   r   r   r   r   r   fuse_shared_experts_inside_sbo6   s   z'SboFlags.fuse_shared_experts_inside_sboN)__name__
__module____qualname__classmethodr   r   r   r   r   r   r   r   r	      s    



r	   c                   @  sV   e Zd ZU ded< ded< ded< dZded	< dZd
ed< dZded< dZded< dS )CombineOverlapArgsbooloverlapztorch.cuda.Streamstreamtorch.cuda.Event
wait_eventNzOptional[int]num_smszOptional[torch.Tensor]signal@   block_mr   	threshold)r   r   r   __annotations__r!   r"   r$   r%   r   r   r   r   r   >   s   
 r   c                   @  s&   e Zd ZU ded< ded< ded< dS )DownGemmOverlapArgsintr!   ztorch.Tensorr"   r   start_eventN)r   r   r   r&   r   r   r   r   r'   J   s   
 r'   c                 C  s&  t  st  sd d i fS | j}|j\}}}tjjddj}t	j
 r)t	j
 }nt r.dnd}|| }|d us:J tj }	td|||	d}
t|d}d }t  rt r`tj|tj|jd}nd	}||| d
 |  }tj|tj|jd}t||	|d}d|
_||
_||
_n|t|	dO }|
||fS )Ncuda)device       F)r   r!   r   r    )compute_num_sms)dtyper+   r#      )r"   r)   r!   T)record_event_after_down)r	   r   r   hidden_statesshapetorchr*   get_device_propertiesmulti_processor_countr   %SGLANG_DEEPEP_LL_COMBINE_SEND_NUM_SMSis_setr   r   Eventr   dictzerosuint32r+   int32r'   r   r"   r%   )dispatch_output
alt_streamr2   num_local_expertsnum_tokens_static
hidden_dimtotal_num_smscommunicate_num_smsr.   combine_wait_eventcombine_overlap_argsmeta_overlap_argsdown_gemm_overlap_argscombine_signalMIN_BLOCK_Mcombine_signal_sizer   r   r   compute_overlap_argsQ   sh   





rL   )
__future__r   dataclassesr   typingr   r4   sglang.srt.environr   sglang.srt.layers.moer   sglang.srt.layers.moe.utilsr   sglang.srt.utilsr   r	   r   r'   rL   r   r   r   r   <module>   s   "