o
    پi_.                     @  s  d dl mZ d dlZd dlmZmZ d dlmZ d dlm	Z	m
Z
mZmZmZmZmZmZmZmZ d dlZe	rVd dlmZ d dlmZmZmZmZmZmZmZmZ d dlm Z  G d	d
 d
Z!G dd dZ"G dd de"Z#G dd de"Z$G dd de"Z%G dd de"Z&G dd dZ'G dd deZ(eG dd deZ)G dd dZ*G dd deZ+eG dd  d eZ,G d!d" d"eZ-G d#d$ d$eZ.dS )%    )annotationsN)ABCabstractmethod)Enum)
TYPE_CHECKINGAnyCallableOptionalOrderedDictProtocolTuple	TypeGuardUnionruntime_checkable)CombineOverlapArgs)DeepEPLLCombineInputDeepEPLLDispatchOutputDeepEPNormalCombineInputDeepEPNormalDispatchOutputFlashinferCombineInputFlashinferDispatchOutputStandardCombineInputStandardDispatchOutput)
TopKOutputc                   @  s"   e Zd ZdZd	ddZdd ZdS )
_RemovableDispatcherHandler   
hooks_dictr
   c                 C  s&   t j| _t  jd7  _t|| _d S )N   )r   next_ididweakrefrefweak_hooks_dictselfr    r$   _/home/ubuntu/.local/lib/python3.10/site-packages/sglang/srt/layers/moe/token_dispatcher/base.py__init__+   s   z#_RemovableDispatcherHandle.__init__c                 C  s.   |   }|d ur| j|v r|| j= d S d S d S N)r!   r   r"   r$   r$   r%   remove0   s   z!_RemovableDispatcherHandle.removeN)r   r
   )__name__
__module____qualname__r   r&   r(   r$   r$   r$   r%   r   '   s    
r   c                   @  s(   e Zd Zdd ZdddZdd
dZdS )DispatcherBaseHooksc                 C  s   t ttf  | _d S r'   )r
   intr   	hook_dictr#   r$   r$   r%   r&   8   s   zDispatcherBaseHooks.__init__hook_funr   returnr   c                 C  s   t | j}|| j|j< |S r'   )r   r.   r   )r#   r0   handler$   r$   r%   register_hook;   s   
z!DispatcherBaseHooks.register_hookOptional[Any]c                 O  s   t d)Nz.This method should be overridden by subclasses)NotImplementedError)r#   argskwargsr$   r$   r%   __call__@   s   zDispatcherBaseHooks.__call__N)r0   r   r1   r   )r1   r4   )r)   r*   r+   r&   r3   r8   r$   r$   r$   r%   r,   6   s    
r,   c                   @  s   e Zd Zdd	d
ZdS )_PreDispatchHooks
dispatcherBaseDispatcherhidden_statestorch.Tensortopk_outputr   r1   )Optional[Tuple[torch.Tensor, TopKOutput]]c                 C  s4   | j  D ]}||||}|d ur|\}}q||fS r'   r.   values)r#   r:   r<   r>   r0   hook_outputr$   r$   r%   r8   F   s   z_PreDispatchHooks.__call__N)r:   r;   r<   r=   r>   r   r1   r?   r)   r*   r+   r8   r$   r$   r$   r%   r9   D       r9   c                   @     e Zd Zd
ddZd	S )_PostDispatchHooksr:   r;   dispatch_outputDispatchOutputr1   Optional[DispatchOutput]c                 C  *   | j  D ]}|||}|d ur|}q|S r'   r@   )r#   r:   rG   r0   rB   r$   r$   r%   r8   U      
z_PostDispatchHooks.__call__N)r:   r;   rG   rH   r1   rI   rC   r$   r$   r$   r%   rF   S   rD   rF   c                   @  rE   )_PreCombineHooksr:   r;   combine_inputCombineInputr1   Optional[CombineInput]c                 C  rJ   r'   r@   )r#   r:   rM   r0   rB   r$   r$   r%   r8   a   rK   z_PreCombineHooks.__call__N)r:   r;   rM   rN   r1   rO   rC   r$   r$   r$   r%   rL   _   rD   rL   c                   @  rE   )_PostCombineHooksr:   r;   r<   r=   r1   Optional[torch.Tensor]c                 C  rJ   r'   r@   )r#   r:   r<   r0   rB   r$   r$   r%   r8   m   rK   z_PostCombineHooks.__call__N)r:   r;   r<   r=   r1   rQ   rC   r$   r$   r$   r%   rP   k   rD   rP   c                   @  s`   e Zd ZedddZedddZedd
dZedddZedddZedddZ	dS )DispatchOutputCheckerrG   rH   r1   !TypeGuard[StandardDispatchOutput]c                 C  
   | j  S r'   formatis_standardrG   r$   r$   r%   format_is_standard|      
z(DispatchOutputChecker.format_is_standardc                 C  rT   r'   rU   rX   r$   r$   r%   format_is_triton_kernels   rZ   z.DispatchOutputChecker.format_is_triton_kernels%TypeGuard[DeepEPNormalDispatchOutput]c                 C  rT   r'   )rV   is_deepep_normalrX   r$   r$   r%   format_is_deepep_normal   rZ   z-DispatchOutputChecker.format_is_deepep_normal!TypeGuard[DeepEPLLDispatchOutput]c                 C  rT   r'   )rV   is_deepep_llrX   r$   r$   r%   format_is_deepep_ll   rZ   z)DispatchOutputChecker.format_is_deepep_llDTypeGuard[Union[DeepEPNormalDispatchOutput, DeepEPLLDispatchOutput]]c                 C  rT   r'   )rV   	is_deepeprX   r$   r$   r%   format_is_deepep   rZ   z&DispatchOutputChecker.format_is_deepep#TypeGuard[FlashinferDispatchOutput]c                 C  rT   r'   )rV   is_flashinferrX   r$   r$   r%   format_is_flashinfer   rZ   z*DispatchOutputChecker.format_is_flashinferN)rG   rH   r1   rS   )rG   rH   r1   r\   )rG   rH   r1   r_   )rG   rH   r1   rb   )rG   rH   r1   re   )
r)   r*   r+   staticmethodrY   r[   r^   ra   rd   rg   r$   r$   r$   r%   rR   z   s    rR   c                   @  sN   e Zd ZdZdZdZdZdddZdd	d
ZdddZ	dddZ
dddZdS )DispatchOutputFormatstandarddeepep_normal	deepep_ll
flashinferr1   boolc                 C  
   | t jkS r'   )ri   STANDARDr/   r$   r$   r%   rW         
z DispatchOutputFormat.is_standardc                 C  ro   r'   )ri   DEEPEP_NORMALr/   r$   r$   r%   r]      rq   z%DispatchOutputFormat.is_deepep_normalc                 C  ro   r'   )ri   	DEEPEP_LLr/   r$   r$   r%   r`      rq   z!DispatchOutputFormat.is_deepep_llc                 C  s   | t jt jfv S r'   )ri   rr   rs   r/   r$   r$   r%   rc      s   zDispatchOutputFormat.is_deepepc                 C  ro   r'   )ri   
FLASHINFERr/   r$   r$   r%   rf      rq   z"DispatchOutputFormat.is_flashinferN)r1   rn   )r)   r*   r+   rp   rr   rs   rt   rW   r]   r`   rc   rf   r$   r$   r$   r%   ri      s    



ri   c                   @  s(   e Zd ZU dZded< ed	ddZdS )
rH   z3Protocol for dispatch outputs in different formats.r=   r<   r1   ri   c                 C     d S r'   r$   r/   r$   r$   r%   rV         zDispatchOutput.formatN)r1   ri   )r)   r*   r+   __doc____annotations__propertyrV   r$   r$   r$   r%   rH      s
   
 rH   c                   @  sR   e Zd ZedddZeddd	ZedddZedddZedddZdS )CombineInputCheckerrM   rN   r1   TypeGuard[StandardCombineInput]c                 C     | j tjkS r'   )rV   CombineInputFormatrp   rM   r$   r$   r%   rY         z&CombineInputChecker.format_is_standard#TypeGuard[DeepEPNormalCombineInput]c                 C  r|   r'   )rV   r}   rr   r~   r$   r$   r%   r^      r   z+CombineInputChecker.format_is_deepep_normalTypeGuard[DeepEPLLCombineInput]c                 C  r|   r'   )rV   r}   rs   r~   r$   r$   r%   ra      r   z'CombineInputChecker.format_is_deepep_ll@TypeGuard[Union[DeepEPNormalCombineInput, DeepEPLLCombineInput]]c                 C  s   | j tjtjfv S r'   )rV   r}   rr   rs   r~   r$   r$   r%   rd      s   z$CombineInputChecker.format_is_deepep!TypeGuard[FlashinferCombineInput]c                 C  r|   r'   )rV   r}   rt   r~   r$   r$   r%   rg      r   z(CombineInputChecker.format_is_flashinferN)rM   rN   r1   r{   )rM   rN   r1   r   )rM   rN   r1   r   )rM   rN   r1   r   )rM   rN   r1   r   )	r)   r*   r+   rh   rY   r^   ra   rd   rg   r$   r$   r$   r%   rz      s    rz   c                   @  s   e Zd ZdZdZdZdZdS )r}   rj   rk   rl   rm   N)r)   r*   r+   rp   rr   rs   rt   r$   r$   r$   r%   r}      s
    r}   c                   @  s   e Zd ZdZedddZdS )rN   z1Protocol for combine inputs in different formats.r1   r}   c                 C  ru   r'   r$   r/   r$   r$   r%   rV      rv   zCombineInput.formatN)r1   r}   )r)   r*   r+   rw   ry   rV   r$   r$   r$   r%   rN      s    rN   c                   @  s   e Zd ZdZdS )BaseDispatcherConfigz"Base class for dispatcher configs.N)r)   r*   r+   rw   r$   r$   r$   r%   r      s    r   c                   @  s   e Zd ZdZdd Zed3d
dZd3ddZd4ddZed5ddZ	d5ddZ
d4ddZd6ddZd7dd Zd8d"d#Zd9d%d&Zd:d)d*Zd;d.d/Zd4d0d1Zd2S )<r;   zBase class for dispatchers.c                 C  s:   d | _ d | _d | _d | _d | _d | _d | _d | _d | _d S r'   )	quant_configoverlap_argsmeta_overlap_args_pre_dispatch_hooks_post_dispatch_hooks_pre_combine_hooks_post_combine_hooks_original_dispatch_func_original_combine_funcr/   r$   r$   r%   r&     s   
zBaseDispatcher.__init__r<   r=   r>   r   r1   rH   c                 C  ru   r'   r$   )r#   r<   r>   r$   r$   r%   dispatch  s   zBaseDispatcher.dispatchc                 C  sD   | j d ur|  | ||\}}| j||d}| jd ur | | |}|S )N)r<   r>   )r   r   r   )r#   r<   r>   rG   r$   r$   r%   _dispatch_with_hook  s   

z"BaseDispatcher._dispatch_with_hookNonec                 C  "   | j d u r| j| _ | j| _d S d S r'   )r   r   r   r/   r$   r$   r%   _override_dispatch_func+     
z&BaseDispatcher._override_dispatch_funcrM   rN   c                 C  ru   r'   r$   )r#   rM   r$   r$   r%   combine0  s   zBaseDispatcher.combinec                 C  s<   | j d ur|  | |}| j|d}| jd ur| | |}|S )Nr~   )r   r   r   )r#   rM   r<   r$   r$   r%   _combine_with_hook4  s   

z!BaseDispatcher._combine_with_hookc                 C  r   r'   )r   r   r   r/   r$   r$   r%   _override_combine_func<  r   z%BaseDispatcher._override_combine_funchook_Callable[[BaseDispatcher, torch.Tensor, TopKOutput], Optional[Tuple[torch.Tensor, TopKOutput]]]r   c                 C  *   | j d u rt | _ |   | j |}|S r'   )r   r9   r   r3   r#   r   r2   r$   r$   r%   register_pre_dispatch_hookA  s
   
z)BaseDispatcher.register_pre_dispatch_hookDCallable[[BaseDispatcher, DispatchOutput], Optional[DispatchOutput]]c                 C  r   r'   )r   rF   r   r3   r   r$   r$   r%   register_post_dispatch_hookN  
   
z*BaseDispatcher.register_post_dispatch_hook@Callable[[BaseDispatcher, CombineInput], Optional[CombineInput]]c                 C  r   r'   )r   rL   r   r3   r   r$   r$   r%   register_pre_combine_hookW  r   z(BaseDispatcher.register_pre_combine_hook@Callable[[BaseDispatcher, torch.Tensor], Optional[torch.Tensor]]c                 C  r   r'   )r   rP   r   r3   r   r$   r$   r%   register_post_combine_hook`  r   z)BaseDispatcher.register_post_combine_hookr   dictc                 C  s
   || _ d S r'   )r   )r#   r   r$   r$   r%   set_quant_configi  rq   zBaseDispatcher.set_quant_configcombine_overlap_argsr   r   c                 C  s   || _ || _d S r'   r   r   )r#   r   r   r$   r$   r%   set_overlap_argsl  s   
zBaseDispatcher.set_overlap_argsc                 C  s   d | _ d | _d S r'   r   r/   r$   r$   r%   clear_overlap_argsr  s   
z!BaseDispatcher.clear_overlap_argsN)r<   r=   r>   r   r1   rH   )r1   r   )rM   rN   r1   r=   )r   r   r1   r   )r   r   r1   r   )r   r   r1   r   )r   r   r1   r   )r   r   r1   r   )r   r   r   r   r1   r   )r)   r*   r+   rw   r&   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r$   r$   r$   r%   r;     s$    






	
	
	
r;   )/
__future__r   r   abcr   r   enumr   typingr   r   r   r	   r
   r   r   r   r   r   torch-sglang.srt.batch_overlap.single_batch_overlapr   &sglang.srt.layers.moe.token_dispatcherr   r   r   r   r   r   r   r   sglang.srt.layers.moe.topkr   r   r,   r9   rF   rL   rP   rR   ri   rH   rz   r}   rN   r   r;   r$   r$   r$   r%   <module>   s4    0(
'#