o
    پi5.                     @   s   d dl Z d dlZd dlmZ d dlmZmZ d dlmZ d dl	m
Z
 d dlmZ d dlmZmZ d dlmZ G d	d
 d
eZedkrI	 e   dS dS )    N)init_distributed_environment)get_tp_groupinitialize_model_parallel)set_dp_buffer_len)FlashinferDispatcher)initialize_moe_config)
ServerArgs$set_global_server_args_for_scheduler)CustomTestCasec                   @   sH   e Zd Zedd Zedd Z	dd	d
Zdd Zdd Zdd Z	dS )TestFlashinferDispatcherc                 C   s   t dd}d|_d|_t| t| tddddd tj }tj	 }t
d|tj   }tj| t||d	 d S )
Ndummy)
model_pathflashinfer_cutlass
flashinfernccl)
world_sizerank
local_rankbackendzcuda:)tensor_model_parallel_sizeexpert_model_parallel_size)r   moe_runner_backendmoe_a2a_backendr	   r   r   torchdistributedget_world_sizeget_rankdevicecudadevice_count
set_devicer   )clsserver_argsr   r   r    r$   Z/home/ubuntu/.local/lib/python3.10/site-packages/sglang/test/test_flashinfer_dispatcher.py
setUpClass   s$   



z#TestFlashinferDispatcher.setUpClassc                 C   s   t j rt j  d S d S )N)r   r   is_initializeddestroy_process_group)r"   r$   r$   r%   tearDownClass)   s   
z&TestFlashinferDispatcher.tearDownClass            c                 C   s   t t j||||tjdS )z$Helper to create dispatcher instance)grouprouter_topknum_expertsnum_local_expertshidden_sizeparams_dtype)r   r   device_groupr   bfloat16)selfr/   r0   r1   r2   r$   r$   r%   create_dispatcher/   s   z*TestFlashinferDispatcher.create_dispatcherc                 C   s  d}d}d}t j }t j }|}d}t|| |ddd t j||fd| t jdd	}|d | }	|	}
t j||f|
t jdd	}t j||ft j	dd	}d
dl
m} |||dd}t j  | j||||d}|ddi |||}|j}| |jd |d | | }| |jd
 || d||  d | t ||| |d |  d| k | t |d||  dk | t ||d | d dk dS )z!Test basic dispatch functionality   r-      TNglobal_dp_buffer_lenlocal_dp_buffer_lendp_max_paddingglobal_num_tokens      Y@r   dtyper   r   StandardTopKOutputtopk_weightstopk_idsrouter_logitsr/   r0   r1   r2   input_global_scaleShould receive  tokens        )r   r   r   r   r   fullr5   int32onesfloat32sglang.srt.layers.moe.topkrC   barrierr7   set_quant_configdispatchhidden_statesassertEqualhidden_states_scaleshape
assertTrueall)r6   
num_tokensr2   r/   r   r   r0   r1   rU   target_ranktarget_expertrF   rE   rC   topk_output
dispatcherdispatch_outputreceived_hidden_statesexpected_source_rankr$   r$   r%   test_dispatch_basic<   s   



z,TestFlashinferDispatcher.test_dispatch_basicc                 C   s  d}d}d}t j }t j }|}d}t|| |dg dd |d | }|}	|dkrKt jd|t jdd}
t jd|t jdd}t jd|t jdd}n%t j	||fd	| t jdd}
t j	||f|	t jdd}t j
||ft jdd}dd
lm} |||dd}| j||||d}|ddi ||
|}|j}|d | | }| |jd || d||  d |dkr| t |dkd dS | t ||| |d |  d	| kd | t |d||  dkd | t ||d | d dkd dS )z2Test dispatch when there are no tokens (edge case)r8   r9   F)r8   r   r8   r8   r:   r   r   r@   r?   rB   NrD   rH   rI   rJ   rK   r*   rL   zRank should receive no tokenszQRank {rank} should receive tokens from the expected source {expected_source_rank}z1Rank should receive no tokens from previous ranksz-Rank should receive no tokens from next ranks)r   r   r   r   r   emptyr5   rN   rP   rM   rO   rQ   rC   r7   rS   rT   rU   rV   rX   rY   rZ   )r6   r[   r2   r/   r   r   r0   r1   r\   r]   rU   rF   rE   rC   r^   r_   r`   ra   rb   r$   r$   r%   test_dispatch_with_empty_tokens   s   



z8TestFlashinferDispatcher.test_dispatch_with_empty_tokensc                 C   sX  d}d}d}t j }t j }|}d}t|| |ddd t j||ft jdd}|d | }	|	}
t j||f|
t jdd}t j	||ft j
dd}dd	lm} |||dd
}| j||||d}t jdt j
dd}|d|i |||}| |jj|| |d f | |jjt j | |jd | |j || |d   | |jjt j dS )z+Test dispatch with FP4 quantization enabledr-   r9   TNr:   r   r@   r   rB   rD   rH   g      ?rI   r*   r8   )r   r   r   r   r   randnr5   rM   rN   rO   rP   rQ   rC   r7   tensorrS   rT   rV   rU   rX   rA   uint8assertNotEqualrW   numel)r6   r[   r2   r/   r   r   r0   r1   rU   r\   r]   rF   rE   rC   r^   r_   rI   r`   r$   r$   r%   #test_dispatch_with_fp4_quantization   s`   

z<TestFlashinferDispatcher.test_dispatch_with_fp4_quantizationN)r*   r+   r,   r-   )
__name__
__module____qualname__classmethodr&   r)   r7   rc   re   rk   r$   r$   r$   r%   r      s    


Tlr   __main__)unittestr   sglang.srt.distributedr   %sglang.srt.distributed.parallel_stater   r   sglang.srt.layers.dp_attentionr   1sglang.srt.layers.moe.token_dispatcher.flashinferr   sglang.srt.layers.moe.utilsr   sglang.srt.server_argsr   r	   sglang.test.test_utilsr
   r   rl   mainr$   r$   r$   r%   <module>   s       .