o
    پi
                     @  s   d dl mZ d dlmZ d dlZd dlmZmZ d dlm	Z	 er&d dl
mZ edd	d
Ze	dddgd	ddddZe	ddgd	dd ddZ	dd!ddZdS )"    )annotations)TYPE_CHECKINGN)
cache_onceload_jit)register_custom_op)Modulereturnr   c                   C  s   t ddgdgdS )Nrotary_embeddingzelementwise/pos_enc.cuh)r	   zRotaryEmbeddingKernel::run)
cuda_filescuda_wrappers)r    r   r   M/home/ubuntu/.local/lib/python3.10/site-packages/sglang/jit_kernel/pos_enc.py_jit_rotary_embedding_module   s
   r   rotary_embedding_with_keyquerykey)op_namemutates_argsT	positionstorch.Tensor	head_sizeintcos_sin_cacheis_neoxboolNonec                 C  s   t  }|| ||||| dS )a$  
    Apply rotary embedding to query and key tensors.

    Args:
        positions: Position indices of shape [num_tokens] or [batch_size, seq_len]
        query: Query tensor of shape [num_tokens, num_heads, head_size] or [num_tokens, num_heads * head_size]
        key: Key tensor of shape [num_tokens, num_kv_heads, head_size] or [num_tokens, num_kv_heads * head_size]
        cos_sin_cache: Cosine and sine cache of shape [max_position, rot_dim]
        is_neox: Whether to use GPT-NeoX style rotary embedding (True) or GPT-J style (False)
    Nr   r	   )r   r   r   r   r   r   moduler   r   r   r      s   rotary_embedding_without_keyc                 C  s   t  }|| |d ||| d S Nr   )r   r   r   r   r   r   r   r   r   r   7   s   c                 C  s:   |d u rt | |||| ||fS t| ||||| ||fS r   )r   r   )r   r   r   r   r   r   r   r   r   r	   F   s   
r	   )r   r   )T)r   r   r   r   r   r   r   r   r   r   r   r   r   r   )r   r   r   r   r   r   r   r   r   r   r   r   )r   r   r   r   r   r   r   r   r   r   r   r   )
__future__r   typingr   torchsglang.jit_kernel.utilsr   r   sglang.srt.utils.custom_opr   tvm_ffi.moduler   r   r   r   r	   r   r   r   r   <module>   s.    	