o
    پi!                     @   s   d dl Z d dlZd dlZd dlZd dlZd dlZd dlmZ d dl	m
Z
 d dlZeeZdd Zdd Zdd	 Zd
d Zdd ZdS )    N)Path)Listc                  C   s4   t j sdS t j } t j| }|jd |j S )z.Get the compute capability of the current GPU.N
   )torchcudais_availablecurrent_deviceget_device_propertiesmajorminor)device
properties r   I/home/ubuntu/.local/lib/python3.10/site-packages/sgl_kernel/load_utils.py_get_compute_capability   s
   

r   c                    sV   g d}g }g }| D ]}t | t fdd|D r!|| q
|| q
|| S )zCFilter and prioritize compiled extensions over Python source files.)z.soz.pydz.dllc                 3   s*    | ]}t  |p|t  v V  qd S )N)strendswith).0extpathr   r   	<genexpr>%   s    
z._filter_compiled_extensions.<locals>.<genexpr>)r   anyappend)	file_listcompiled_extensionscompiled_filesother_files	file_pathr   r   r   _filter_compiled_extensions   s   r   c                  C   s  t  } td|   ttj}td|  | dkr!d}d}n| dur.d}d|  d	}nd}d
}t|| d }t|}t|}td|  td|  td|  td|  g }|rt|d }td|  zIt	j
dt|}	|	du rtd| t	j
|	}
|	jdu rtd| td| d |	j|
 td|  td|
j  |
W S  ty } z|| td| dt|j d|  W Y d}~nd}~ww td|  t|d }t|}t|}td|  td|  td|  |rt|d }td|  zHt	j
dt|}	|	du r=td| t	j
|	}
|	jdu rPtd| td | d |	j|
 td! td|
j  |
W S  ty } z|| td"| dt|j d|  W Y d}~nd}~ww td#|  td$ zddl}
td% td|
j  |
W S  ty } z|| td&|  W Y d}~nd}~ww d'd(d) |D }d*| d+| d,| d+| d-|  d.| d/| d'}t| t|)0zBLoad the appropriate common_ops library based on GPU architecture.z1[sgl_kernel] GPU Detection: compute_capability = z#[sgl_kernel] sgl_kernel directory: Z   sm90z.SM90 (Hopper/H100 with fast math optimization)Nsm100SMz! (precise math for compatibility)z(CPU/No GPU detected (using precise math)zcommon_ops.*z [sgl_kernel] Attempting to load z3[sgl_kernel] Looking for library matching pattern: z[sgl_kernel] Found files: z [sgl_kernel] Prioritized files: r   z2[sgl_kernel] Found architecture-specific library: 
common_opsz!Could not create module spec for zModule spec has no loader for z![sgl_kernel] Loading module from z...u%   [sgl_kernel] ✓ Successfully loaded u   [sgl_kernel] ✓ Module file: u%   [sgl_kernel] ✗ Failed to load from : uK   [sgl_kernel] ✗ Architecture-specific library not found matching pattern: z6[sgl_kernel] Attempting fallback: looking for pattern z#[sgl_kernel] Found fallback files: z)[sgl_kernel] Prioritized fallback files: z%[sgl_kernel] Found fallback library: z*[sgl_kernel] Loading fallback module from u5   [sgl_kernel] ✓ Successfully loaded fallback libraryu.   [sgl_kernel] ✗ Failed to load fallback from u>   [sgl_kernel] ✗ Fallback library not found matching pattern: zF[sgl_kernel] Final attempt: trying standard Python import 'common_ops'uA   [sgl_kernel] ✓ Successfully imported via standard Python importu0   [sgl_kernel] ✗ Standard Python import failed: 
c                 s   s&    | ]}d t |j d| V  qdS )z- r%   N)type__name__)r   errr   r   r   r      s    
z2_load_architecture_specific_ops.<locals>.<genexpr>zw
[sgl_kernel] CRITICAL: Could not load any common_ops library!

Attempted locations:
1. Architecture-specific pattern: z - found files: z
2. Fallback pattern: zQ
3. Standard Python import: common_ops - failed

GPU Info:
- Compute capability: z
- Expected variant: z

Please ensure sgl_kernel is properly installed with:
pip install --upgrade sgl_kernel

Error details from previous import attempts:
)r   loggerdebugr   __file__parentr   globr   	importlibutilspec_from_file_locationImportErrormodule_from_specloaderexec_module	Exceptionr   r'   r(   r$   join)compute_capabilitysgl_kernel_dir
ops_subdirvariant_nameops_patternraw_matching_filesmatching_filesprevious_import_errorsops_pathspecr$   ealt_patternraw_alt_filesalt_matching_filesalt_pathattempt_error_msg	error_msgr   r   r   _load_architecture_specific_ops0   s   










	

rI   c                  C   sR   t jdpt jd} | du r'td}|dur%t jt j|} | S d} | S )zFind the CUDA install path.	CUDA_HOME	CUDA_PATHNnvccz/usr/local/cuda)osenvirongetshutilwhichr   dirname)	cuda_home	nvcc_pathr   r   r   _find_cuda_home   s   
rU   c                  C   s   t t } | d | d t dt dt dt dg}|D ]B}|d }| r]z| }tjt|tjd t	d	|  W  d
S  t
y\ } zt	d| d|  W Y d
}~qd
}~ww qt	d d
S )zRPreload the CUDA runtime library to help avoid 'libcudart.so.12 not found' issues.liblib64z/usr/lib/x86_64-linux-gnuz/usr/lib/aarch64-linux-gnuz
/usr/lib64z/usr/libzlibcudart.so.12)modezPreloaded CUDA runtime under NzFailed to load r%   z3[sgl_kernel] Could not preload CUDA runtime library)r   rU   existsresolvectypesCDLLr   RTLD_GLOBALr*   r+   r6   )rS   candidate_dirsbase	candidatecuda_runtime_librB   r   r   r   _preload_cuda_library   s.   
	
rb   )r[   r.   importlib.utilr/   loggingrM   rP   pathlibr   typingr   r   	getLoggerr(   r*   r   r   rI   rU   rb   r   r   r   r   <module>   s     
 