o
    پi                     @  st   d dl mZ d dlmZ d dlZd dlmZmZ er d dlm	Z	 eddd	Z
edd
dZdddZdddZdS )    )annotations)TYPE_CHECKINGN)
cache_onceload_jit)Modulereturnr   c                   C     t ddgdgdS )Nconcat_mla_kelementwise/concat_mla.cuh)r	   zConcatMlaKKernel::run
cuda_filescuda_wrappersr    r   r   P/home/ubuntu/.local/lib/python3.10/site-packages/sglang/jit_kernel/concat_mla.py_jit_concat_mla_k_module   
   r   c                   C  r   )Nconcat_mla_absorb_qr
   )r   zConcatMlaAbsorbQKernel::runr   r   r   r   r   r   _jit_concat_mla_absorb_q_module   r   r   ktorch.Tensork_nopek_ropeNonec                 C  s   t  }|| || dS )a  
    Concatenate k_nope and k_rope into k for MLA (Multi-head Latent Attention).

    This kernel efficiently broadcasts k_rope across all heads while copying
    k_nope values directly.

    Args:
        k: Output tensor of shape [num_tokens, num_heads=128, k_head_dim=192], dtype=bfloat16
        k_nope: Input tensor of shape [num_tokens, num_heads=128, nope_head_dim=128], dtype=bfloat16
        k_rope: Input tensor of shape [num_tokens, 1, rope_head_dim=64], dtype=bfloat16
    N)r   r	   )r   r   r   moduler   r   r   r	      s   r	   abc                 C  sP   t jg | jdd | jd |jd  R | j| jd}t }|| || |S )aM  
    Concatenate tensors a and b for MLA absorbed Q computation.

    Args:
        a: Input tensor of shape [dim_0, dim_1, a_last_dim], dtype=bfloat16
        b: Input tensor of shape [dim_0, dim_1, b_last_dim], dtype=bfloat16

    Returns:
        Output tensor of shape [dim_0, dim_1, a_last_dim + b_last_dim], dtype=bfloat16
    N)dtypedevice)torchemptyshaper   r   r   r   )r   r   outr   r   r   r   r   /   s   &r   )r   r   )r   r   r   r   r   r   r   r   )r   r   r   r   r   r   )
__future__r   typingr   r    sglang.jit_kernel.utilsr   r   tvm_ffi.moduler   r   r   r	   r   r   r   r   r   <module>   s    
