o
    پik                     @  s   d dl mZ d dlZd dlmZmZ d dlZd dlmZm	Z	m
Z
mZ er*d dlmZ ed/ddZed0ddZed1ddZed1ddZed2ddZ	d3d dd4d"d#Z		d5d6d(d)Z	d3d7d+d,Z	d3d8d-d.ZdS )9    )annotationsN)TYPE_CHECKINGOptional)
cache_onceis_arch_support_pdlload_jitmake_cpp_args)Modulehead_dimintdtypetorch.dtypereturnr	   c                 C  6   t | t |}tdg|R dgdd| dfgdS )Nqknormzelementwise/qknorm.cuhzQKNormKernel<>::run
cuda_filescuda_wrappersr   r   r   )r
   r   args r   J/home/ubuntu/.local/lib/python3.10/site-packages/sglang/jit_kernel/norm.py_jit_qknorm_module      r   hidden_sizec                 C  r   )Nrmsnormzelementwise/rmsnorm.cuhzRMSNormKernel<r   r   r   )r   r   r   r   r   r   _jit_rmsnorm_module   r   r   c                 C  0   t | }tdg|R dgdd| dfgdS )Nfused_add_rmsnormz!elementwise/fused_add_rmsnorm.cuhzFusedAddRMSNormKernel<r   r   r   r   r   r   r   r   r   _jit_fused_add_rmsnorm_module)   s   r"   c                 C  r   )Nqknorm_across_headsz#elementwise/qknorm_across_heads.cuhzQKNormAcrossHeadsKernel<r   r   r    r!   r   r   r   _jit_qknorm_across_heads_module4   s   r$   boolc              
   C  sp   t t}| dvr|d|  d dS zt| | W dS  ty7 } z|d|  W Y d }~dS d }~ww )N)@         i   i   zUnsupported head_dim=z for JIT QK-Norm kernelFTz#Failed to load JIT QK-Norm kernel: )logging	getLogger__name__warningr   	Exception)r
   r   loggerer   r   r   can_use_fused_inplace_qknormA   s   

r0   ư>)r
   qtorch.Tensorkq_weightk_weightepsfloatNonec                C  s0   |p|  d}t|| j}|| |||| d S N)sizer   r   r   )r2   r4   r5   r6   r7   r
   moduler   r   r   fused_inplace_qknormO   s   	r>   inputweightoutputOptional[torch.Tensor]c                 C  s:   |d ur|n| }|  d}t|| j}|| ||| d S r:   )r<   r   r   r   )r?   r@   rA   r7   r   r=   r   r   r   r   ]   s   
r   residualc                 C  s   t | j}|| ||| d S )N)r"   r   r   )r?   rC   r@   r7   r=   r   r   r   r   i   s   
r   c                 C  s    t | j}|| |||| dS )a  
    Fused inplace QK normalization across all heads.

    Args:
        q: Query tensor of shape [batch_size, num_heads * head_dim]
        k: Key tensor of shape [batch_size, num_heads * head_dim]
        q_weight: Query weight tensor of shape [num_heads * head_dim]
        k_weight: Key weight tensor of shape [num_heads * head_dim]
        eps: Epsilon for numerical stability
    N)r$   r   r#   )r2   r4   r5   r6   r7   r=   r   r   r   !fused_inplace_qknorm_across_headss   s   
rD   )r
   r   r   r   r   r	   )r   r   r   r   r   r	   )r   r   r   r	   )r
   r   r   r   r   r%   )r1   )r2   r3   r4   r3   r5   r3   r6   r3   r7   r8   r
   r   r   r9   )Nr1   )
r?   r3   r@   r3   rA   rB   r7   r8   r   r9   )
r?   r3   rC   r3   r@   r3   r7   r8   r   r9   )r2   r3   r4   r3   r5   r3   r6   r3   r7   r8   r   r9   )
__future__r   r)   typingr   r   torchsglang.jit_kernel.utilsr   r   r   r   tvm_ffi.moduler	   r   r   r"   r$   r0   r>   r   r   rD   r   r   r   r   <module>   s8    


