o
    پi                     @   s   d dl Z d dlmZ d dlmZ d dlmZ d dlmZ d dl	m
Z
 ddgZ	dd	e jd
e jde
fddZdedededefddZdS )    N)fused_qk_rope_cat_and_cache_mla)fused_qk_rope_cat)gemm_a16w16)gemm_a16w16_atomic)BumpAllocatorr   r   hidden_statesweightgemm_output_zero_allocatorc                 C   s   | j d }|j d }d }|dkr,|d kr ||| ||}ntj||ftj| jd}|d ur=t| ||d| j	}|S t
| |}|S )Nr      )dtypedevice)y)shapeallocateviewtorchzerosfloat32r   r   tor   r   )r   r   r	   MNr   logits r   W/home/ubuntu/.local/lib/python3.10/site-packages/sglang/srt/layers/rocm_linear_utils.pyaiter_dsv3_router_gemm   s   


r   n_routed_expertsnum_moe_layersallocate_sizeembedding_dimc                 C   s(   |dks| dkr
dS d||   }|| S )Ni   r
   r   r   )r   r   r   r   per_layer_sizer   r   r   (get_dsv3_gemm_output_zero_allocator_size%   s   r    )N)r   aiter.ops.triton.fused_kv_cacher    aiter.ops.triton.fused_qk_concatr   aiter.ops.triton.gemm_a16w16r   #aiter.ops.triton.gemm_a16w16_atomicr   sglang.srt.utilsr   __all__Tensorr   intr    r   r   r   r   <module>   s0    
