o
    پiO                  
   @  sR  U d dl mZ d dlZd dlZd dlZd dlmZmZmZm	Z	m
Z
mZmZmZ d dlZer3d dlmZ ededef dZd7d
dZd8ddZed9ddZe Zeed gZddgZg dZg Zeeeeejf Zde d< G dd de!e Z"ejdej#dej$diZ%d:d d!Z&dddddddddd"	d;d0d1Z'ed<d3d4Z(ed=d5d6Z)dS )>    )annotationsN)TYPE_CHECKINGAnyCallableListTuple	TypeAliasTypeVarUnion)ModuleF.)boundfnreturnc                   s    i t   fdd}|S )z
    NOTE: `functools.lru_cache` is not compatible with `torch.compile`
    So we manually implement a simple cache_once decorator to replace it.
    c                    s>   | t t| dd df}|vr | i ||< | S )Nc                 S  s   | d S )Nr    )xr   r   K/home/ubuntu/.local/lib/python3.10/site-packages/sglang/jit_kernel/utils.py<lambda>   s    z-cache_once.<locals>.wrapper.<locals>.<lambda>)key)tuplesorteditems)argskwargsr   r   
result_mapr   r   wrapper   s   zcache_once.<locals>.wrapper)	functoolswraps)r   r   r   r   r   
cache_once   s   r   tupTuple[str, str]strc                 C  s   | \}}d| d| dS )NzTVM_FFI_DLL_EXPORT_TYPED_FUNC(z, (z));r   )r    export_namekernel_namer   r   r   _make_wrapper"   s   r%   pathlib.Pathc                    sD   t tj   fdd} dd }|  p| }|d u r td|S )Nc                    s(      } | d  r| d  r| S d S )Nincludecsrc)resolveexists)	candidatecur_dirr   r   _environment_install,   s   z2_resolve_kernel_path.<locals>._environment_installc                   S  s   d S Nr   r   r   r   r   _package_install2   s   z._resolve_kernel_path.<locals>._package_installzCannot find sgl-kernel/jit path)pathlibPath__file__parentr)   RuntimeError)r.   r0   pathr   r,   r   _resolve_kernel_path'   s   r7   r'   
-std=c++20-O3)r8   r9   z--expt-relaxed-constexprr   CPP_TEMPLATE_TYPEc                   @  s   e Zd ZdddZdS )
CPPArgListr   r"   c                 C  s
   d | S )Nz, )join)selfr   r   r   __str__E   s   
zCPPArgList.__str__Nr   r"   )__name__
__module____qualname__r>   r   r   r   r   r;   D   s    r;   fp32_tfp16_tbf16_tr   c                    s    d	dd t  fdd| D S )
Nargr:   r   r"   c                 S  sR   t | tr| r	dS dS t | ttfrt| S t | tjr t|  S tdt	|  )Ntruefalsez,Unsupported argument type for cpp template: )

isinstanceboolintfloatr"   torchdtypeCPP_DTYPE_MAP	TypeErrortype)rF   r   r   r   _convertQ   s   
zmake_cpp_args.<locals>._convertc                 3  s    | ]} |V  qd S r/   r   .0rF   rR   r   r   	<genexpr>Z       z make_cpp_args.<locals>.<genexpr>)rF   r:   r   r"   )r;   )r   r   rU   r   make_cpp_argsP   s   
	rX   )		cpp_files
cuda_filescpp_wrapperscuda_wrappersextra_cflagsextra_cuda_cflagsextra_ldflagsextra_include_pathsbuild_directoryrY   List[str] | NonerZ   r[   List[Tuple[str, str]] | Noner\   r]   r^   r_   r`   ra   
str | Noner   c        	      
   G  s"  ddl m}
 | p	g } |pg }|pg }|pg }|pg }|pg }|p!g }|p%g }dd | D }dd |D }|dd |D 7 }dd |D }dd |D }|d	d |D 7 }d
}|tjv }|sct tj|< z&|
dddd |	D  ||t| t| t| t	| |dW |stj|= S S |stj|= w )a{  
    Loading a JIT module from C++/CUDA source files.
    We define a wrapper as a tuple of (export_name, kernel_name),
    where `export_name` is the name used to called from Python,
    and `kernel_name` is the name of the kernel class in C++/CUDA source.

    :param args: Unique marker of the JIT module. Must be distinct for different kernels.
    :type args: str
    :param cpp_files: A list of C++ source files.
    :type cpp_files: List[str] | None
    :param cuda_files: A list of CUDA source files.
    :type cuda_files: List[str] | None
    :param cpp_wrappers: A list of C++ wrappers, defining the export name and kernel name.
    :type cpp_wrappers: List[Tuple[str, str]] | None
    :param cuda_wrappers: A list of CUDA wrappers, defining the export name and kernel name.
    :type cuda_wrappers: List[Tuple[str, str]] | None
    :param extra_cflags: Extra C++ compiler flags.
    :type extra_cflags: List[str] | None
    :param extra_cuda_cflags: Extra CUDA compiler flags.
    :type extra_cuda_cflags: List[str] | None
    :param extra_ldflags: Extra linker flags.
    :type extra_ldflags: List[str] | None
    :param extra_include_paths: Extra include paths.
    :type extra_include_paths: List[str] | None
    :param build_directory: The build directory for JIT compilation.
    :type build_directory: str | None
    :return: A just-in-time(JIT) compiled module.
    :rtype: Module
    r   )load_inlinec                 S     g | ]
}t d  |  qS r(   KERNEL_PATHr)   rT   fr   r   r   
<listcomp>       zload_jit.<locals>.<listcomp>c                 S     g | ]}d | dqS z
#include ""r   rT   r6   r   r   r   rl          c                 S     g | ]}t |qS r   r%   rT   r    r   r   r   rl          c                 S  rf   rg   rh   rj   r   r   r   rl      rm   c                 S  rn   ro   r   rq   r   r   r   rl      rr   c                 S  rs   r   rt   ru   r   r   r   rl      rv   TVM_FFI_CUDA_ARCH_LISTsgl_kernel_jit__c                 s  s    | ]}t |V  qd S r/   )r"   rS   r   r   r   rV      rW   zload_jit.<locals>.<genexpr>)cpp_sourcescuda_sourcesr]   r^   r_   r`   ra   )
tvm_ffi.cppre   osenviron_get_cuda_arch_listr<   DEFAULT_CFLAGSDEFAULT_CUDA_CFLAGSDEFAULT_LDFLAGSDEFAULT_INCLUDE)rY   rZ   r[   r\   r]   r^   r_   r`   ra   r   re   	cpp_pathsrz   
cuda_pathsr{   env_keyenv_existedr   r   r   load_jit]   sD   *


r   rJ   c                  C  s&   dd l } | j }| j|d dkS )Nr   	   rM   cudacurrent_deviceget_device_capability)rM   devicer   r   r   is_arch_support_pdl   s   
r   c                  C  s(   t j } t j| \}}| d| S )zDGet the correct CUDA architecture string for TVM_FFI_CUDA_ARCH_LIST..r   )r   majorminorr   r   r   r      s   
r   )r   r   r   r   )r    r!   r   r"   )r   r&   )r   r:   r   r;   )r   r"   rY   rb   rZ   rb   r[   rc   r\   rc   r]   rb   r^   rb   r_   rb   r`   rb   ra   rd   r   r   )r   rJ   r?   )*
__future__r   r   r}   r1   typingr   r   r   r   r   r   r	   r
   rM   tvm_ffir   r   r   r%   r7   ri   r"   r   r   r   r   rK   rL   rJ   rN   r:   __annotations__listr;   float16bfloat16rO   rX   r   r   r   r   r   r   r   <module>   sN    (


U