o
    پi                     @   s   d dl Z d dlZd dlmZ d dlmZ d dlmZm	Z	 d dl
mZmZ g dZdd Ze jd	ee jd
g de jdg de dededed
edef
ddZedkrfeddddd dS dS )    N)cutlass_fused_moe)
SiluAndMul)
TopKConfigselect_experts)
ServerArgs$set_global_server_args_for_scheduler)
)      r	   )r   r	      )r      r	   )r   r   r
   )@   r	   r	   )r   r	   r
   )r   r   r	   )r   i   r	   )   r	   r	   )r   r	   r
   c                 C   st  | j \}}ttdd d}||}|d dks#J d| d| |d }	|j|	|d\}
}tj||
g|d }| |d	|	d
|d

d	|} tj|| |j d
 | j| jd}tj|d	tjd}t||\}}|d	}|d	}t|j d D ]#}||k}| rt | | || dd
 || dd
 ||< q}||d	|j d
 ||d	d
|j jd
dS )Ndummy)
model_pathr   r   zExpected even size in dim z, got )dim   )dtypedevice)r   r   )shaper   r   sizesplittorchcat
contiguousviewrepeatreshapezerosr   r   softmaxfloat32topkrangesumr   	transposeto)aw13w2scorer"   BDr   r   halfw1w3outtopk_weighttopk_idsimask r5   W/home/ubuntu/.local/lib/python3.10/site-packages/sglang/test/test_cutlass_w16a16_moe.pytorch_moe_reference   s4   

   

(r7   zm,n,ke)(   r      r"   )r         mnkc              	   C   s   t d t j}t j| |fd|dd }t j|d| |fd|dd }t j|||fd|dd }t j| |fd|d}	t||	t|ddd}
|
\}}}t||||||d	d
d }t||||	|}t jj	||ddd d	S )z
    Test the bf16 cutlass moe API.

    Args:
        m: number of tokens
        n: intermediate size
        k: hidden size
        e: number of experts
        topk: top-k experts per token
       cuda)r   r   
   r   F)top_krenormalize)hidden_statesrouter_logitstopk_configN)inputtoken_selected_expertstoken_final_scalesfc1_expert_weightsfc2_expert_weightsoutput_dtypequant_scalesr   g{Gz?)rtolatol)
r   manual_seedbfloat16randnr   r   flashinfer_cutlass_fused_moer7   testingassert_close)r=   r>   r?   r8   r"   r   r'   r(   r)   r*   topk_outputtopk_weightsr2   _test_outputtorch_outputr5   r5   r6    test_flashinfer_bf16_cutlass_moe:   s2   


r\   __main__r   r	   r<   r   )pytestr   flashinfer.fused_moer   rT   sglang.srt.layers.activationr   sglang.srt.layers.moe.topkr   r   sglang.srt.server_argsr   r   MNK_FACTORSr7   markparametrizeinference_modeintr\   __name__r5   r5   r5   r6   <module>   s    !&6