o
    پi                  
   @   s   d Z ddlZddlmZ ddlmZmZ ddlm	Z	 ddl
mZmZ ddlmZ dejjd	ed
efddZdejjdejdede	d
ejf
ddZdS )z
Torch-native implementation for FusedMoE. This is used for torch.compile.
It is based on https://github.com/pytorch-labs/gpt-fast/blob/32971d3129541c5bfb4f715abc33d1c5f408d204/mixtral-moe/model.py#L204
    N)
functional)
GeluAndMul
SiluAndMul)MoeRunnerConfig)StandardCombineInputStandardDispatchOutput)StandardTopKOutputlayerdispatch_outputreturnc                 C   s   |\}}}| j }|jrt |\}}}| j| }	tj|	ddd\}
}| j| }td||
}|jdkr9t	
|}n|jdkrDt	|}ntd|jtd||}td|| |}td	|||j}t|d
S )N   dimzti,taoi -> taosilugelu5Unsupported activation: moe_runner_config.activation=zti, taoi -> taoztao, taio -> taiztai,ta -> ti)hidden_states)moe_runner_configapply_router_weight_on_inputNotImplementedError
w13_weighttorchchunk	w2_weighteinsum
activationFr   r   
ValueErrortodtyper   )r	   r
   xx_scaletopk_outputr   topk_weightstopk_ids_w13_weights
w1_weights
w3_weights
w2_weightsx1x3expert_outs r-   Z/home/ubuntu/.local/lib/python3.10/site-packages/sglang/srt/layers/moe/fused_moe_native.pyfused_moe_forward_native   s(   






r/   r    r"   r   c                 C   s  |j rt |\}}}| j}||jd |f}|d|tjd |j	dd}	|
d }
||
|jd   }|	  }	|jdkrHt }n|jdkrQt }ntd|jg }d}t|	D ]4\}}|| }|dkrnqa||| }| j| }| j| }t||}||}t||}|| |}qat|rtj|ddn|d}t|}|||
< |j
g |jdR  |j|jddj	dd|j}|S )Nr      r   r   r   r   ) r   r   num_experts	new_zerosshapescatter_r   r   int64sumviewargsortcpunumpyr   r   r   r   	enumerater   r   r   linearappendlencat	new_empty
empty_liketyper   mul_	unsqueeze)r	   r    r"   r   r#   r$   r%   len_expertscntstokens_per_expertidxssorted_tokensactoutputs	start_idxi
num_tokensend_idxtokens_for_this_expertlayer_w13_weightlayer_w2_weightgate_up
expert_outoutsnew_x	final_outr-   r-   r.   moe_forward_native1   sP   





 
rY   )__doc__r   torch.nnr   r   sglang.srt.layers.activationr   r    sglang.srt.layers.moe.moe_runnerr   &sglang.srt.layers.moe.token_dispatcherr   r   sglang.srt.layers.moe.topkr   nnModuler/   TensorrY   r-   r-   r-   r.   <module>   s2    
