o
    U۷i$                     @   s`  d dl Z d dlZd dlZd dlZd dlZd dlZdd Ze r!dndZejee	dZ
g dZeD ]
Zee
ee e< q0dd	lmZmZ d
ZejdddejddfddZejfddZee
j_ee_dd Z				d#ddZ				d#ddZee_ee_ dd Z!dd Z"e re!  ne"  ddl#m#Z#m$Z$m%Z% ddl&m'Z' d dl(m)Z) de*d e)fd!d"Z+dS )$    Nc                   C   s   t jdS )Nwin)sysplatform
startswith r   r   D/home/ubuntu/vllm_env/lib/python3.10/site-packages/cudnn/__init__.py
is_windows	   s   r   z.Release._compiled_modulez._compiled_module)package)backend_versionbackend_version_stringget_last_error_stringdestroy_handlenorm_forward_phasereduction_modebehavior_note	knob_typecreate_handlecreate_kernel_cachecreate_device_properties
get_streamnumerical_note
set_streambuild_plan_policy	data_typetensor_reordering	heur_modepygraphtensorknobcudnnGraphNotSupportedErrordiagonal_alignmentattention_implementation   )_library_type_is_torch_tensorz1.18.0F c
           
      C   s    | j ||t|||||||	d	S )ag  
    Create a tensor.

    Args:
        dim (List[int]): The dimensions of the tensor.
        stride (List[int]): The strides of the tensor.
        data_type (cudnn.data_type): The data type of the tensor.
        is_virtual (bool): Flag indicating if the tensor is virtual.
        is_pass_by_value (bool): Flag indicating if the tensor is passed by value.
        ragged_offset (cudnn_tensor): The ragged offset tensor.
        reordering_type (cudnn.tensor_reordering): The reordering type of the tensor.
        name (str): The name of the tensor.

    Returns:
        cudnn_tensor: The created tensor.
    )	dimstrider   
is_virtualis_pass_by_valueragged_offsetreordering_typenameuid)_make_tensorr#   )
selfr'   r(   r   r)   r*   r+   r,   r-   r.   r   r   r   _tensor4   s   r1   c                 C   s   |  t|S N)_set_data_typer#   )r0   r   r   r   r   r3   ]   s   r3   c                 C   s*   t | tu r| S t| r|  S t| S r2   )typeintr$   data_ptr_pybind_module_get_data_ptr)input_tensorr   r   r   _library_device_pointerh   s
   
r:   c           	      C   s,   dd |  D }t|}| ||| dS )aQ  
    Execute a cudnn graph.

    Args:
        tensor_to_device_buffer (dict(cudnn_tensor, Union[torch.Tensor, int, __dlpack__])): The dimensions of the tensor.
        workspace (Union[torch.Tensor, int, __dlpack__]): The name of the tensor.
        handle: cudnn_handle created with cudnn.create_handle()
    Returns:
        None
    c                 S   6   i | ]\}}|d urt |tu r|n| t|qS r2   r4   r5   get_uidr:   .0xpointerr   r   r   
<dictcomp>       0z_execute.<locals>.<dictcomp>N)itemsr:   _execute)	r0   tensor_to_device_buffer	workspacehandleoverride_uidsoverride_shapesoverride_stridesuid_to_tensor_pointerworkspace_pointerr   r   r   rE   t   s
   rE   c           
   	   C   s4   dd |  D }t|}	| ||	||||| dS )a  
    Execute a cudnn graph.

    Args:
        tensor_to_device_buffer (dict(cudnn_tensor, Union[torch.Tensor, int, __dlpack__])): The dimensions of the tensor.
        workspace (Union[torch.Tensor, int, __dlpack__]): The name of the tensor.
        index(int): Location of execution plan to use.
        handle: cudnn_handle created with cudnn.create_handle()
    Returns:
        None
    c                 S   r;   r2   r<   r>   r   r   r   rB      rC   z*_execute_plan_at_index.<locals>.<dictcomp>N)rD   r:   _execute_plan_at_index)
r0   rF   rG   indexrH   rI   rJ   rK   rL   rM   r   r   r   rN      s   rN   c                  C   s|   t  tjtdd} | r(t| dksJ dt|  dtj	| d }ntj	d}t
|jtjj}t| d S )Npurelibznvidia/cudnn/bin/cudnn64_9.dllr"   Found z% libcudnn.dll.x in nvidia-cudnn-cuXX.r   zcudnn64_9.dll)globospathjoin	sysconfigget_pathlenctypeswindllLoadLibrarycast_handlec_void_pvaluer7   _set_dlhandle_cudnnlib_pathlibrH   r   r   r   
load_cudnn   s    rd   c                  C   s   t  tjtdd} | st  tjtdd} | r6t| dks.J dt|  dt| d }n$ztd}W n t	yY   ztd	}W n t	yV   d }Y nw Y nw |d urnt
|jtjj}t| d S d S )
NrP   z#nvidia/cudnn/lib/libcudnn.so.*[0-9]z'nvidia/cudnn_jit/lib/libcudnn.so.*[0-9]r"   rQ   z$ libcudnn.so.x in nvidia-cudnn-cuXX.r   zlibcudnn.so.9zlibcudnn.so)rR   rS   rT   rU   rV   rW   rX   rY   CDLL	Exceptionr\   r]   r^   r_   r7   r`   ra   r   r   r   _dlopen_cudnn   s(    rg   )graphjitgraph_cache)Graph)Anyr-   returnc           
   
   C   s,  | dkr"z	ddl m} |W S  ty! } ztd| |d }~ww | dkrDz	ddlm} |W S  tyC } ztd| |d }~ww | dkrfz	dd	lm} |W S  tye } ztd
| |d }~ww | dkrz	ddlm} |W S  ty } ztd| |d }~ww | dkrz	ddlm	} |W S  ty } ztd| |d }~ww | dkrz	ddl
m} |W S  ty } ztd| |d }~ww | dkrz	ddlm} |W S  ty } ztd| |d }~ww | dkrz	ddlm}	 |	W S  ty } ztd| |d }~ww t| )NNSAr"   )rn   z_NSA requires optional dependencies. Install with 'pip install nvidia-cudnn-frontend[cutedsl]': GemmSwigluSm100)ro   zkGemmSwigluSm100 requires optional dependencies. Install with 'pip install nvidia-cudnn-frontend[cutedsl]': gemm_swiglu_wrapper_sm100)rp   zugemm_swiglu_wrapper_sm100 requires optional dependencies. Install with 'pip install nvidia-cudnn-frontend[cutedsl]': GemmAmaxSm100)rq   ziGemmAmaxSm100 requires optional dependencies. Install with 'pip install nvidia-cudnn-frontend[cutedsl]': gemm_amax_wrapper_sm100)rr   zsgemm_amax_wrapper_sm100 requires optional dependencies. Install with 'pip install nvidia-cudnn-frontend[cutedsl]': grouped_gemm)rs   zhgrouped_gemm requires optional dependencies. Install with 'pip install nvidia-cudnn-frontend[cutedsl]': GroupedGemmSwigluSm100)rt   zrGroupedGemmSwigluSm100 requires optional dependencies. Install with 'pip install nvidia-cudnn-frontend[cutedsl]': !grouped_gemm_swiglu_wrapper_sm100)ru   z}grouped_gemm_swiglu_wrapper_sm100 requires optional dependencies. Install with 'pip install nvidia-cudnn-frontend[cutedsl]': )native_sparse_attentionrn   rf   ImportErrorgemm_swigluro   rp   	gemm_amaxrq   rr   r%   rs   rt   ru   AttributeError)
r-   _NSAe_GemmSwigluSm100_gemm_swiglu_wrapper_sm100_GemmAmaxSm100_gemm_amax_wrapper_sm100_grouped_gemm_GroupedGemmSwigluSm100"_grouped_gemm_swiglu_wrapper_sm100r   r   r   __getattr__   s   
r   )NNNN),rY   rR   rS   r   rV   	importlibr   module_nameimport_module__name__r7   symbols_to_importsymbol_namegetattrglobals	datatypesr#   r$   __version__r   NOT_SETr   NONEr1   r3   r   set_data_typer   r:   rE   rN   executeexecute_plan_at_indexrd   rg   rh   ri   rj   wrapperrk   typingrl   strr   r   r   r   r   <module>   s`    
+

 
%