o
    پi                     @  s   d dl mZ d dlZd dlmZ d dlZd dlmZ d dlm	Z	m
Z
mZmZmZ d dlmZ d dlmZ d dlmZ eeZG d	d
 d
eZG dd deZG dd de	ZdS )    )annotationsN)
NamedTuple)envs)BaseDispatcherCombineInputCombineInputFormatDispatchOutputDispatchOutputFormat)DeepEPBuffer)
TopKOutput)
DeepEPModec                   @  (   e Zd ZU dZded< ed	ddZdS )
FuseEPDispatchOutputz#DeepEP low latency dispatch output.torch.Tensorhidden_statereturnr	   c                 C     t jS N)r	   	DEEPEP_LLself r   a/home/ubuntu/.local/lib/python3.10/site-packages/sglang/srt/layers/moe/token_dispatcher/fuseep.pyformat      zFuseEPDispatchOutput.formatN)r   r	   __name__
__module____qualname____doc____annotations__propertyr   r   r   r   r   r      
   
 r   c                   @  r   )
FuseEPCombineInputz!DeepEP low latency combine input.r   r   r   r   c                 C  r   r   )r   r   r   r   r   r   r   &   r   zFuseEPCombineInput.formatN)r   r   r   r   r   r   r   r#   !   r"   r#   c                   @  sB   e Zd Zdddddejfd ddZd!ddZd"ddZdd ZdS )#NpuFuseEPDispatcherFNgrouptorch.distributed.ProcessGrouprouter_topkintpermute_fusionboolnum_expertsnum_local_expertshidden_sizeparams_dtypetorch.dtypedeepep_moder   c	           	      C  sF   || _ || _|| _|| _|| _|| _|| _|| _d| _t	j
 | _d S )N   )r%   r'   r)   r+   r,   r-   r.   r0   params_bytesr   .SGLANG_DEEPEP_NUM_MAX_DISPATCH_TOKENS_PER_RANKget num_max_dispatch_tokens_per_rank)	r   r%   r'   r)   r+   r,   r-   r.   r0   r   r   r   __init__,   s   zNpuFuseEPDispatcher.__init__hidden_statesr   topk_outputr   r   r   c                 K  sD   |   j||j|j|d |d |d |d | j| jd	\}}t|S )Ngmm1_permuted_weightgmm1_permuted_weight_scalegmm2_weightgmm2_weight_scale)topk_idxtopk_weightsr9   r:   r;   r<   r5   r+   )_get_bufferfused_deep_moetopk_idsr>   r5   r+   r   )r   r7   r8   kwargs_r   r   r   dispatchE   s   
zNpuFuseEPDispatcher.dispatchcombine_inputr   c                 K  s   d S r   r   )r   rE   rB   r   r   r   combineU   s   zNpuFuseEPDispatcher.combinec                 C  s(   t   t | j| j| j| j| j| jS r   )	r
    set_dispatch_mode_as_low_latencyget_deepep_bufferr%   r-   r2   r0   r5   r+   r   r   r   r   r?   X   s   zNpuFuseEPDispatcher._get_buffer)r%   r&   r'   r(   r)   r*   r+   r(   r,   r(   r-   r(   r.   r/   r0   r   )r7   r   r8   r   r   r   )rE   r   r   r   )	r   r   r   r   LOW_LATENCYr6   rD   rF   r?   r   r   r   r   r$   +   s    

r$   )
__future__r   loggingtypingr   torchsglang.srt.environr   +sglang.srt.layers.moe.token_dispatcher.baser   r   r   r   r	   -sglang.srt.layers.moe.token_dispatcher.deepepr
   sglang.srt.layers.moe.topkr   sglang.srt.layers.moe.utilsr   	getLoggerr   loggerr   r#   r$   r   r   r   r   <module>   s    


