o
    پi@                     @  s   d dl mZ d dlZd dlZd dlmZmZ d dlmZm	Z	m
Z
 d dlmZ d dlmZ d dlmZ d dlmZ erTd d	lmZ d d
lmZ d dlmZmZ d dlmZ eeZG dd dZdS )    )annotationsN)TYPE_CHECKINGOptional)FusedOpPoolMoeRunnerConfigPermuteMethodPool)DeepGemmRunnerCore)TritonRunnerCore)TritonKernelsRunnerCore)get_moe_a2a_backend)DownGemmOverlapArgs)MoeQuantInfo)CombineInputDispatchOutput)MoeRunnerBackendc                   @  s4   e Zd ZdddZdddZdddZdddZdS )	MoeRunnerrunner_backendr   configr   c                 C  s   || _ || _d | _| rt|| _n+| rt|| _n!| r't	|| _n|
 r/d | _n| r7d | _ntd| t j}|j}t||| _| jd u ra| jd u ratd| d| dd | _d | _tjdd}|dkr|td d | _d S d S )	NzUnsupported runner backend: zRunner backend z' requires a fused func for a2a backend z, but none is registered. SGLANG_CI_DISABLE_MOE_FUSED_FUNC01zBSGLANG_CI_DISABLE_MOE_FUSED_FUNC is set to 1, disabling fused func)r   r   
fused_func	is_tritonr	   runner_coreis_triton_kernelsr
   is_deep_gemmr   	is_marlinis_flashinfer_trtllmNotImplementedErrorr   valuer   get_fused_funcdown_gemm_overlap_argsmeta_overlap_argsosenvirongetloggerinfo)selfr   r   a2a_backend_namerunner_backend_namer    r+   [/home/ubuntu/.local/lib/python3.10/site-packages/sglang/srt/layers/moe/moe_runner/runner.py__init__   sH   
zMoeRunner.__init__dispatch_outputr   
quant_infor   returnr   c           
      C  s   | j d ur|  ||| jS | jd usJ |jj}| jjj}t||| _i }| j	d ur0| j	|d< | j
d ur:| j
|d< | ||| j|}| j|||}| jjj}|jj}t||| _| ||| j|}	|	S )Nr!   r"   )r   r   r   formatr   r   r   get_pre_permutepre_permute_funcr!   r"   runget_post_permutepost_permute_func)
r(   r.   r/   dispatch_formatrunner_formatrunning_staterunner_inputrunner_outputcombine_formatcombine_inputr+   r+   r,   r4   I   s4   








zMoeRunner.runr!   r   r"   dictc                 C  s"   | j d u s	J d|| _|| _d S Nz,Fused func is not supported for overlap argsr   r!   r"   )r(   r!   r"   r+   r+   r,   set_overlap_argsm   s   
zMoeRunner.set_overlap_argsNonec                 C  s"   | j d u s	J dd | _d | _d S r?   r@   )r(   r+   r+   r,   clear_overlap_argst   s   
zMoeRunner.clear_overlap_argsN)r   r   r   r   )r.   r   r/   r   r0   r   )r!   r   r"   r>   )r0   rB   )__name__
__module____qualname__r-   r4   rA   rC   r+   r+   r+   r,   r      s
    

-
$r   )
__future__r   loggingr#   typingr   r   %sglang.srt.layers.moe.moe_runner.baser   r   r   *sglang.srt.layers.moe.moe_runner.deep_gemmr   'sglang.srt.layers.moe.moe_runner.tritonr	   /sglang.srt.layers.moe.moe_runner.triton_kernelsr
   sglang.srt.layers.moe.utilsr   -sglang.srt.batch_overlap.single_batch_overlapr   r   +sglang.srt.layers.moe.token_dispatcher.baser   r   r   	getLoggerrD   r&   r   r+   r+   r+   r,   <module>   s     
