o
    پi>#                     @  s(  d dl mZ d dlmZmZ d dlmZ d dlmZm	Z	m
Z
mZmZ d dlZd dlmZmZmZ erHd dlmZmZmZ d dlmZmZmZmZ eG d	d
 d
ZeG dd deZG dd deZeG dd deZG dd deZG dd dZ G dd dZ!d$ddZ"d%dd Z#d&d"d#Z$dS )'    )annotations)ABCabstractmethod)	dataclass)TYPE_CHECKINGCallableOptionalTuple	TypeGuardN)MoeA2ABackendMoeRunnerBackendRoutingMethodType)TritonRunnerCoreTritonRunnerInputTritonRunnerOutput)CombineInputCombineInputFormatDispatchOutputDispatchOutputFormatc                   @  s   e Zd ZU dZded< dZded< dZded< dZded< dZded< dZ	ded< dZ
ded	< dZd
ed< dZded< dZded< dZded< dZded< dZded< dZded< dZded< dZded< dZded< dS )MoeRunnerConfigNzOptional[int]num_expertsnum_local_expertshidden_sizeintermediate_size_per_partitionlayer_idtop_knum_fused_shared_expertszOptional[torch.dtype]params_dtypezOptional[RoutingMethodType]routing_method_typesilustr
activationTboolis_gatedFapply_router_weight_on_inputinplace
no_combinezOptional[float]routed_scaling_factorgemm1_alphagemm1_clamp_limit)__name__
__module____qualname__r   __annotations__r   r   r   r   r   r   r   r   r!   r#   r$   r%   r&   r'   r(   r)    r.   r.   Y/home/ubuntu/.local/lib/python3.10/site-packages/sglang/srt/layers/moe/moe_runner/base.pyr      s$   
 r   c                   @  (   e Zd Zeed	ddZd
ddZdS )RunnerInputreturnr   c                 C     d S Nr.   selfr.   r.   r/   runner_backend7      zRunnerInput.runner_backendTypeGuard[TritonRunnerInput]c                 C     | j tjkS r4   r7   r   TRITONr5   r.   r.   r/   runner_backend_is_triton;      z$RunnerInput.runner_backend_is_tritonNr2   r   )r2   r9   r*   r+   r,   propertyr   r7   r=   r.   r.   r.   r/   r1   5   s
    r1   c                   @  r0   )RunnerOutputr2   r   c                 C  r3   r4   r.   r5   r.   r.   r/   r7   @   r8   zRunnerOutput.runner_backendTypeGuard[TritonRunnerOutput]c                 C  r:   r4   r;   r5   r.   r.   r/   r=   D   r>   z%RunnerOutput.runner_backend_is_tritonNr?   )r2   rC   r@   r.   r.   r.   r/   rB   ?   s
    rB   c                   @  s   e Zd ZdZdS )MoeQuantInfozMoe quantization data.N)r*   r+   r,   __doc__r.   r.   r.   r/   rD   H   s    rD   c                   @  s@   e Zd ZdddZedddZeedddZdddZdS )MoeRunnerCoreconfigr   c                 C  s
   || _ d S r4   )rG   )r6   rG   r.   r.   r/   __init__P   s   
zMoeRunnerCore.__init__runner_inputr1   
quant_inforD   running_statedictr2   rB   c                 C  r3   r4   r.   )r6   rI   rJ   rK   r.   r.   r/   runS   s   zMoeRunnerCore.runr   c                 C  r3   r4   r.   r5   r.   r.   r/   r7   Y   r8   zMoeRunnerCore.runner_backendTypeGuard[TritonRunnerCore]c                 C  r:   r4   r;   r5   r.   r.   r/   r=   ]   r>   z&MoeRunnerCore.runner_backend_is_tritonN)rG   r   )rI   r1   rJ   rD   rK   rL   r2   rB   r?   )r2   rN   )	r*   r+   r,   rH   r   rM   rA   r7   r=   r.   r.   r.   r/   rF   O   s    
rF   c                   @  s6   e Zd ZU i Zded< eddd	ZedddZdS )FusedOpPoolzdict[str, Callable]_fused_funcsa2a_backend_namer    runner_backend_name
fused_funcr   c                 C  sb   ||f}|| j v rtd| d| dt|sJ d| t|s*J d| || j |< d S )NzFused function for  to  is already registered.zInvalid dispatch name: zInvalid runner name: )rP   
ValueErrorr   r   )clsrQ   rR   rS   keyr.   r.   r/   register_fused_funcd   s    
zFusedOpPool.register_fused_funcdispatch_namerunner_namer2   Optional[Callable]c                 C  s   ||f}| j |}|S r4   )rP   get)rW   rZ   r[   rX   rS   r.   r.   r/   get_fused_funcu   s   zFusedOpPool.get_fused_funcN)rQ   r    rR   r    rS   r   )rZ   r    r[   r    r2   r\   )r*   r+   r,   rP   r-   classmethodrY   r^   r.   r.   r.   r/   rO   a   s   
 rO   c                   @  s^   e Zd ZU i Zded< i Zded< edd
dZedddZedddZ	edddZ
dS ) PermuteMethodPoolz=dict[Tuple[DispatchOutputFormat, MoeRunnerBackend], Callable]_pre_permute_methodsz;dict[Tuple[MoeRunnerBackend, CombineInputFormat], Callable]_post_permute_methodsdispatch_output_namer    rR   permute_funcr   c                 C  6   ||f}|| j v rtd| d| d|| j |< dS )a8  
        Register a customized pre-permute function for the given DispatchOutputFormat and MoeRunnerBackend.

        :param dispatch_output_name: The DispatchOutputFormat name.
        :param runner_backend_name: The MoeRunnerBackend name.
        :param permute_func: The permute function to register.
        zPre-permute method for rT   rU   N)ra   rV   )rW   rc   rR   rd   rX   r.   r.   r/   register_pre_permute      
z&PermuteMethodPool.register_pre_permutecombine_input_namec                 C  re   )a3  
        Register a customized post-permute function for the given MoeRunnerBackend and CombineInputFormat.

        :param runner_backend_name: The MoeRunnerBackend name.
        :param combine_input_name: The CombineInputFormat name.
        :param permute_func: The permute function to register.
        zPost-permute method for rT   rU   N)rb   rV   )rW   rR   rh   rd   rX   r.   r.   r/   register_post_permute   rg   z'PermuteMethodPool.register_post_permutedispatch_output_formatr   runner_input_formatr   r2   c                 C  6   ||f}| j |}|dusJ d| d| d|S )a9  
        Retrieve the pre-permute function for the given DispatchOutputFormat and MoeRunnerBackend.

        :param dispatch_output_format: The DispatchOutputFormat type.
        :param runner_input_format: The MoeRunnerBackend type.
        :return: The registered permute function or None if not found.
        NzPre-permute function for rT    is not registered)ra   r]   )rW   rj   rk   rX   pre_permute_funcr.   r.   r/   get_pre_permute      
z!PermuteMethodPool.get_pre_permuterunner_output_formatcombine_input_formatr   c                 C  rl   )a5  
        Retrieve the post-permute function for the given MoeRunnerBackend and CombineInputFormat.

        :param runner_output_format: The MoeRunnerBackend type.
        :param combine_input_format: The CombineInputFormat type.
        :return: The registered permute function or None if not found.
        NzPost-permute function for rT   rm   )rb   r]   )rW   rq   rr   rX   post_permute_funcr.   r.   r/   get_post_permute   rp   z"PermuteMethodPool.get_post_permuteN)rc   r    rR   r    rd   r   )rR   r    rh   r    rd   r   )rj   r   rk   r   r2   r   )rq   r   rr   r   r2   r   )r*   r+   r,   ra   r-   rb   r_   rf   ri   ro   rt   r.   r.   r.   r/   r`   |   s   
 

r`   rQ   r    rR   r2   r   c                   s   d fdd}|S )z
    Decorator to register a fused function for the given DispatchOutputFormat and MoeRunnerBackend.

    :param a2a_backend_name: The A2A backend name.
    :param runner_backend_name: The MoeRunnerBackend name.
    :return: The decorator function.
    rS   r   c                      t  |  | S r4   )rO   rY   )rS   rQ   rR   r.   r/   	decorator   s   z&register_fused_func.<locals>.decoratorN)rS   r   r.   )rQ   rR   rw   r.   rv   r/   rY      s   rY   rc   c                      d fdd}|S )	a  
    Decorator to register a pre-permute function for the given DispatchOutputFormat and MoeRunnerBackend.

    :param dispatch_output_name: The DispatchOutputFormat name.
    :param runner_backend_name: The MoeRunnerBackend name.
    :return: The decorator function.
    rd   LCallable[[DispatchOutput, MoeQuantInfo, MoeRunnerConfig, dict], RunnerInput]r2   r   c                   ru   r4   )r`   rf   rd   rc   rR   r.   r/   rw         z'register_pre_permute.<locals>.decoratorN)rd   ry   r2   r   r.   )rc   rR   rw   r.   r{   r/   rf         
rf   rh   c                   rx   )	a  
    Decorator to register a post-permute function for the given MoeRunnerBackend and CombineInputFormat.

    :param runner_backend_name: The MoeRunnerBackend name.
    :param combine_input_name: The CombineInputFormat name.
    :return: The decorator function.
    rd   KCallable[[RunnerOutput, MoeQuantInfo, MoeRunnerConfig, dict], CombineInput]r2   r   c                   s   t  |  | S r4   )r`   ri   rz   rh   rR   r.   r/   rw     r|   z(register_post_permute.<locals>.decoratorN)rd   r~   r2   r   r.   )rR   rh   rw   r.   r   r/   ri     r}   ri   )rQ   r    rR   r    r2   r   )rc   r    rR   r    r2   r   )rR   r    rh   r    r2   r   )%
__future__r   abcr   r   dataclassesr   typingr   r   r   r	   r
   torchsglang.srt.layers.moe.utilsr   r   r   'sglang.srt.layers.moe.moe_runner.tritonr   r   r   &sglang.srt.layers.moe.token_dispatcherr   r   r   r   r   r1   rB   rD   rF   rO   r`   rY   rf   ri   r.   r.   r.   r/   <module>   s,    		
]
