o
    پi                     @  s   d dl mZ d dlmZ d dlZd dlmZmZmZm	Z	 er$d dl
mZ edd
dZddddZddddZddddZddddZddddZdS )    )annotations)TYPE_CHECKINGN)KERNEL_PATH
cache_onceload_jitmake_cpp_args)Moduledtypetorch.dtypereturnr   c                 C  s   t | }td d  }tdg|R dgdd| dfdd	| dfd
d| dfdd| dfdd| dfgt|gdS )Ncsrczfast-hadamard-transformhadamardz(fast-hadamard-transform/hadamard_jit.cuhhadamard_transformzHadamardKernel<z>::runhadamard_transform_12nzHadamard12NKernel<hadamard_transform_20nzHadamard20NKernel<hadamard_transform_28nzHadamard28NKernel<hadamard_transform_40nzHadamard40NKernel<)
cuda_filescuda_wrappersextra_include_paths)r   r   resolver   str)r	   argshadamard_include_dir r   N/home/ubuntu/.local/lib/python3.10/site-packages/sglang/jit_kernel/hadamard.py_jit_hadamard_module   s    r         ?xtorch.Tensorscalefloatc                 C  s   | j std|  }| d}| d|} | ddkr!|  } |d dkr5tjj	| dd|d  f} | d}t
| }t| j}|| || |d dkr[|d d d |f }||S )Nz-hadamard_transform only supports CUDA tensors      r   )is_cudaRuntimeErrorsizereshapestride
contiguoustorchnn
functionalpad
empty_liker   r	   r   )r   r    	shapes_ogdim_ogdimoutmoduler   r   r   r       s    




r   c                 C     | j std|  }| d}| d|} | ddkr!|  } d}|| dkr7tjj	| d|||  f} t
| }t| j}|| || || dkrX|d d d |f }||S )Nz1hadamard_transform_12n only supports CUDA tensorsr"   r#   0   r   )r%   r&   r'   r(   r)   r*   r+   r,   r-   r.   r/   r   r	   r   r   r    r0   r1   pad_multipler3   r4   r   r   r   r   7       



r   c                 C  r5   )Nz1hadamard_transform_20n only supports CUDA tensorsr"   r#   P   r   )r%   r&   r'   r(   r)   r*   r+   r,   r-   r.   r/   r   r	   r   r7   r   r   r   r   N   r9   r   c                 C  r5   )Nz1hadamard_transform_28n only supports CUDA tensorsr"   r#   p   r   )r%   r&   r'   r(   r)   r*   r+   r,   r-   r.   r/   r   r	   r   r7   r   r   r   r   e   r9   r   c                 C  r5   )Nz1hadamard_transform_40n only supports CUDA tensorsr"   r#      r   )r%   r&   r'   r(   r)   r*   r+   r,   r-   r.   r/   r   r	   r   r7   r   r   r   r   |   r9   r   )r	   r
   r   r   )r   )r   r   r    r!   r   r   )
__future__r   typingr   r+   sglang.jit_kernel.utilsr   r   r   r   tvm_ffi.moduler   r   r   r   r   r   r   r   r   r   r   <module>   s    