o
    پi'                     @   s   d dl mZ d dlmZmZ d dlZd dlmZ d dlm	Z	 d dl
mZ d dlmZ eG dd	 d	Zd
efddZdejjdedefddZdd Zdd ZdejjdedefddZdd Zdd ZdejjdedefddZdd Zd d! ZdS )"    )	dataclass)ListOptionalN
operations)	Operation)DeepEPConfig)ForwardModec                   @   sr   e Zd ZU ee ed< dZee ed< dZ	ee ed< e
ded  dd fddZed	ejjd
edd fddZdS )OperationsStrategyr   Ndeep_gemm_num_smstbo_delta_stagesitemsreturnc                 C   s4   t dd |D tdd |D tdd |D dS )Nc                 S   s   g | ]
}|j D ]}|qqS  r   ).0itemxr   r   `/home/ubuntu/.local/lib/python3.10/site-packages/sglang/srt/batch_overlap/operations_strategy.py
<listcomp>   s    z-OperationsStrategy.concat.<locals>.<listcomp>c                 S      g | ]}|j qS r   )r   r   r   r   r   r   r          c                 S   r   r   )r   r   r   r   r   r      r   )r   r   r   )r
   _assert_all_same)clsr   r   r   r   concat   s   zOperationsStrategy.concatlayersforward_modec                    sp   | d j j}|dkrt fdd| D S |dkr&t fdd| D S |dkr6t fdd| D S t)	Nr   DeepseekV2DecoderLayerc                       g | ]}t | qS r   )3_compute_moe_deepseek_layer_operations_strategy_tbor   layerr   r   r   r   &       z3OperationsStrategy.init_new_tbo.<locals>.<listcomp>Qwen3MoeDecoderLayerc                    r   r   )0_compute_moe_qwen3_layer_operations_strategy_tbor    r"   r   r   r   /   r#   MiMoV2DecoderLayerc                    r   r   )1_compute_moe_mimov2_layer_operations_strategy_tbor    r"   r   r   r   8   r#   )	__class____name__r
   r   NotImplementedError)r   r   
layer_namer   r"   r   init_new_tbo   s(   


	zOperationsStrategy.init_new_tbo)r)   
__module____qualname__r   r   __annotations__r   r   intr   classmethodr   staticmethodtorchnn
ModuleListr	   r,   r   r   r   r   r
      s   
 r
   r   c                    s"   t  fdd D sJ  d S )Nc                 3   s    | ]	}| d  kV  qdS )r   Nr   r   r   r   r   	<genexpr>D   s    z#_assert_all_same.<locals>.<genexpr>r   )allr6   r   r6   r   r   C   s   r   r!   r   r   c                 C   J   | j sJ d|tjkrt| S |tjks|tjkrt| S td|)Nz#dense layer TBO not yet implementedUnsupported forward_mode=)is_layer_sparser	   EXTEND"_compute_moe_deepseek_blog_prefillDECODETARGET_VERIFY!_compute_moe_deepseek_blog_decoder*   r!   r   r   r   r   r   L      
r   c                 C   s   t jjdd}|j}|t j }t|d| j| j	j
| j	j| j| jj| jj| jjt | jj| jj| jjt | jj| jj| jj| jgdS Ncuda)devicer   r   r   r   )r3   rD   get_device_propertiesmulti_processor_countr   get_instancenum_smsr
   op_comm_prepare_attn	self_attn
op_prepareop_coreop_comm_prepare_mlpmlpop_gateop_select_expertsop_dispatch_ar   YieldOperationop_dispatch_b
op_expertsop_combine_aop_shared_expertsop_combine_b	op_outputop_comm_postprocess_layerr!   device_propertiestotal_num_smsr   r   r   r   r=   [   s0   r=   c                 C   sz   t d d| j| jjt | jj| j| jj	| jj
t | jj| jjt | jj| jj| jjt | jjt | jj| jgdS N   rF   )r
   rK   rL   rM   r   rT   rN   rO   rP   rQ   rR   rS   rX   rU   rV   rW   rY   rZ   r[   r!   r   r   r   r@   x   s0   r@   c                 C   r9   )Nz$qwen3 moe only support sparse layersr:   )r;   r	   r<   _compute_moe_qwen3_prefillr>   r?   _compute_moe_qwen3_decoder*   rA   r   r   r   r%      rB   r%   c                 C      t jjdd}|j}|t j }t|d| j| j	j
| j	j| j| jj| jj| jjt | jj| jj| jjt | jj| jj| jgdS rC   r3   rD   rG   rH   r   rI   rJ   r
   rK   rL   rM   rN   rO   rP   rQ   rR   rS   r   rT   rU   rV   rW   rY   rZ   r[   r\   r   r   r   rb      .   rb   c                 C   t   t d d| j| jjt | jj| j| jj	| jj
t | jjt | jj| jj| jjt | jj| jj| jt gdS r_   r
   rK   rL   rM   r   rT   rN   rO   rP   rQ   rR   rS   rU   rV   rW   rY   rZ   r[   ra   r   r   r   rc      .   rc   c                 C   r9   )Nz1MiMoV2DecoderLayer moe only support sparse layersr:   )r;   r	   r<   _compute_moe_mimov2_prefillr>   r?   _compute_moe_mimov2_decoder*   rA   r   r   r   r'      rB   r'   c                 C   rd   rC   re   r\   r   r   r   rj      rf   rj   c                 C   rg   r_   rh   ra   r   r   r   rk     ri   rk   )dataclassesr   typingr   r   r3   sglang.srt.batch_overlapr   #sglang.srt.batch_overlap.operationsr   &sglang.srt.layers.moe.token_dispatcherr   ,sglang.srt.model_executor.forward_batch_infor	   r
   r   r4   Moduler   r=   r@   r%   rb   rc   r'   rj   rk   r   r   r   r   <module>   sJ    6	
!
 
