o
    پi                     @   s   d dl mZ d dlZdd Z	ddejdejdedeej d	ejf
d
dZ	ddejdejdejdejdedeej d	ejfddZ	ddejdejdejdedeej d	ejfddZdS )    )OptionalNc                 C   s(   |dkrt j| |ddS t j| ||dS )N   T)dimkeepdim)r   )torchmaxtopk)valuesr   r    r
   D/home/ubuntu/.local/lib/python3.10/site-packages/sgl_kernel/top_k.py	fast_topk   s   r   scorelengthsr   
row_startsreturnc                 C   sR   |dksJ d|   dksJ | j| d|ftjd}tjj| ||| |S )a  
    Get the topk indices of the score tensor.
    Args:
        score: The score tensor of shape (B, L). The score tensor is the logits
            between the query and the key whose layout is either ragged or paged.
            row_starts is only required when the key is ragged.
        lengths: The lengths tensor of shape (B)
        topk: The number of topk indices to get
        row_starts: The start index of each row in the score tensor of shape (B).
            For each row i, topk only applies to section [row_starts[i], row_starts[i] + lengths[i]]
            of the score tensor.
    Returns:
        The topk indices tensor of shape (B, topk)
       zGfast_topk_v2 is only optimized for deepseek v3.2 model, where topk=2048   r   dtype)r   	new_emptysizer   int32ops
sgl_kernelr   )r   r   r   r   topk_indicesr
   r
   r   fast_topk_v2   s   
r   page_table_size_1cu_seqlens_qc                 C   sZ   |dksJ d|   dksJ |}| j| jd |ftjd}tjj| ||||| |S )a  
    Get the topk indices of the score tensor and then transform the topk indices
    to indices to the page table (page_size = 1)
    Args:
        score: The score tensor of shape (B, L). The score tensor is the logits
            between the query and the key whose layout is either ragged or paged.
            row_starts is only required when the key is ragged.
        lengths: The lengths tensor of shape (B)
        page_table_size_1: The page table tensor of shape (Batch, topk)
        cu_seqlens_q: The cumulative sequence lengths tensor of shape (Batch + 1)
        topk: The number of topk indices to get
        row_starts: The start index of each row in the score tensor of shape (B).
            For each row i, topk only applies to section [row_starts[i], row_starts[i] + lengths[i]]
            of the score tensor. It's only used for cases where the key is
            ragged, i.e. during extend and draft extend.
    Returns:
        The topk indices tensor of shape (B, topk)
    r   zTfast_topk_transform_fused is only optimized for deepseek v3.2 model, where topk=2048r   r   r   )r   r   shaper   r   r   r   fast_topk_transform_fused)r   r   r   r   r   r   src_page_tabledst_page_tabler
   r
   r   r   -   s   
r   topk_indices_offsetc                 C   sT   |dksJ d|   dksJ | j| jd |ftjd}tjj| |||| |S )a"  
    Get the topk indices of the score tensor and then transform the topk indices to
    indices to ragged kv (non-paged). This function is only used for extend,
    not including draft extend.
    Args:
        score: The score tensor of shape (B, L). The score tensor is the logits
            between the query and the key which can be ragged or paged.
            row_starts is only required when the key is ragged.
        lengths: The lengths tensor of shape (B)
        topk_indices_offset: The offset of topk indices in ragged kv of shape (B)
        topk: The number of topk indices to get
        row_starts: The start index of each row in the score tensor of shape (B).
            For each row i, topk only applies to section [row_starts[i], row_starts[i] + lengths[i]]
            of the score tensor. It can be None if only the fast path is triggered,
            in the case of all values in lengths <= topk (not checked in the kernel,
            guaranteed by the caller).
    Returns:
        The topk indices tensor of shape (B, topk)
    r   z[fast_topk_transform_ragged_fused is only optimized for deepseek v3.2 model, where topk=2048r   r   r   )r   r   r   r   r   r   r    fast_topk_transform_ragged_fused)r   r   r"   r   r   topk_indices_raggedr
   r
   r   r#   S   s   

r#   )N)	typingr   r   r   Tensorintr   r   r#   r
   r
   r
   r   <module>   sZ    
#
+