o
    پi                     @  sv  d dl mZ d dlmZmZmZ d dlZd dlmZm	Z	m
Z
 d dlmZ d dlmZmZmZ d dlmZ d dlmZmZmZmZmZ d d	lmZmZmZ d d
lmZmZ d dl m!Z!m"Z"m#Z# e" Z$e!dofe$Z%erod dlmZ ze# rzd dl&m'Z' nd dl(m)Z' d dl&m'Z* W n e+y   dZ'Y nw G dd deZ,e-e,esJ G dd deZ.e-e.esJ G dd deZ/dS )    )annotations)TYPE_CHECKING
NamedTupleOptionalN)get_moe_expert_parallel_rank"get_moe_expert_parallel_world_sizeget_tp_group)use_symmetric_memory)get_dp_global_num_tokensget_local_dp_bufferis_allocation_symmetric)MoeRunnerConfig)BaseDispatcherCombineInputCombineInputFormatDispatchOutputDispatchOutputFormat)StandardTopKOutput
TopKOutputTopKOutputChecker)get_moe_runner_backend/should_use_flashinfer_cutlass_moe_fp4_allgather)get_bool_env_varis_hipis_sm120_supportedSGLANG_USE_AITER)r   )fp4_quantize)scaled_fp4_quantc                   @  s8   e Zd ZU dZded< ded< ded< edd
dZdS )StandardDispatchOutputzStandard dispatch output.torch.Tensorhidden_stateszOptional[torch.Tensor]hidden_states_scaler   topk_outputreturnr   c                 C     t jS N)r   STANDARDself r)   c/home/ubuntu/.local/lib/python3.10/site-packages/sglang/srt/layers/moe/token_dispatcher/standard.pyformat<      zStandardDispatchOutput.formatN)r#   r   __name__
__module____qualname____doc____annotations__propertyr+   r)   r)   r)   r*   r   5   s   
 r   c                   @  s(   e Zd ZU dZded< ed	ddZdS )
StandardCombineInputzStandard combine input.r   r    r#   r   c                 C  r$   r%   )r   r&   r'   r)   r)   r*   r+   I   r,   zStandardCombineInput.formatN)r#   r   r-   r)   r)   r)   r*   r4   D   s
   
 r4   c                      s2   e Zd Zd fddZdddZdddZ  ZS )StandardDispatchermoe_runner_configr   c                   sN   t    t | _t  | _|j| _|j| _	|j
| j	 | _t | _d | _d S r%   )super__init__r   moe_ep_sizer   is_flashinfer_cutlassenable_flashinfer_cutlass_moenum_expertsnum_fused_shared_expertsnum_local_shared_expertsnum_local_expertsnum_local_routed_expertsr   moe_ep_ranklocal_expert_mapping)r(   r6   	__class__r)   r*   r8   S   s   


zStandardDispatcher.__init__r    r   r"   r   r#   r   c                 C  s  t  rddlm} | jdd }|d usJ d|j|j}}tt t	  d8 |j
d dkr:t||dd\}}n|j
d }	tjd|	d	 tj|jd
}tjd|	d tj|jd
}W d    n1 scw   Y  t j||||gt d\}}}}||}|}|}
t|||jd}n|}d }
| jdkr| jst|r| jd u rtj| jfdtjdd
| _tjd| jtjdd
| j| j| j | jd | j < | jdkrtj| j| j| j tjdd
| j| j d < | jd urt st|r|j!| j|j d}n	t"|rt# t$||
|dS )Nr   )nvfp4_block_scale_interleaveinput_global_scalezinput_global_scale is not set)disabledF)is_sf_swizzled_layout      )dtypedevice   )sizes)topk_weightstopk_idsrouter_logitscudacpu)rP   )r    r!   r"   )%r   
flashinferrE   quant_configgetrO   rP   r	   r   r   shapefp4_quantize_flashinfertorchzerosuint8rL   all_gathervr
   r   rQ   r9   r;   r   format_is_standardrB   fullr<   int32aranger@   rA   r>   
_use_aiter_replaceformat_is_triton_kernelsNotImplementedErrorr   )r(   r    r"   rE   global_scalerO   rP   xx_sfx_colr!   r)   r)   r*   dispatcha   s   







zStandardDispatcher.dispatchcombine_inputr4   c                 C  s0   |\}t  rt |}}t j||t d |S )N)outputrN   )r   r   r   reduce_scattervr
   )r(   rk   r    global_hidden_statesr)   r)   r*   combine   s   zStandardDispatcher.combine)r6   r   )r    r   r"   r   r#   r   )rk   r4   r#   r   )r.   r/   r0   r8   rj   ro   __classcell__r)   r)   rC   r*   r5   Q   s    
Wr5   )0
__future__r   typingr   r   r   rZ   sglang.srt.distributedr   r   r   <sglang.srt.distributed.device_communicators.pynccl_allocatorr	   sglang.srt.layers.dp_attentionr
   r   r   %sglang.srt.layers.moe.moe_runner.baser   +sglang.srt.layers.moe.token_dispatcher.baser   r   r   r   r   sglang.srt.layers.moe.topkr   r   r   sglang.srt.layers.moe.utilsr   r   sglang.srt.utils.commonr   r   r   _is_hiprb   rU   r   
sgl_kernelr   rY   ImportErrorr   
isinstancer4   r5   r)   r)   r)   r*   <module>   s8    
