o
    ۾i                  
   @   s   d dl Z d dlmZ d dlmZmZmZ d dlZd dlZdddZ	dddZ
dd	d
Ze	  e
  e  eedo>eejdZer[ejd	ddejdejdeej ddfddZ	ddejdejdeeee ejf  ddfddZdS )    N)suppress)ListOptionalUnionreturnc                  C   s   ddl } ddl}ddl}ddlm} tj std|	d}|du r`|j
d|j
dd}|du rN|jdkrL|  d	}t|dkrGd
}n|d }nd}|du rVtdt||d d }|j|smtd| dS )z=check if nvcc is available and if pytorch will likely find itr   NPathz CUDA is not available in PyTorchnvcc	CUDA_HOME	CUDA_PATHntz7C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v*.* z/usr/local/cudazNo CUDA toolchain foundbinznvcc compiler not found at )globosshutilpathlibr   torchcudais_availableImportErrorwhichenvirongetnamelenstrpathexists)r   r   r   r   	nvcc_path	cuda_home
cuda_homes r"   e/home/ubuntu/.local/lib/python3.10/site-packages/xgrammar/kernels/apply_token_bitmask_inplace_cuda.py_check_cuda_toolchain   s,   




r$   c               	   C   sN   g d} | D ]}t t tjjj| W d    n1 sw   Y  qd S )N)z-D__CUDA_NO_HALF_OPERATORS__z-D__CUDA_NO_HALF_CONVERSIONS__z"-D__CUDA_NO_BFLOAT16_CONVERSIONS__z-D__CUDA_NO_HALF2_OPERATORS__)r   
ValueErrorr   utilscpp_extensionCOMMON_NVCC_FLAGSremove)REMOVE_NVCC_FLAGSflagr"   r"   r#   _remove_torch_nvcc_flags<   s   
r,   c               	   C   s   ddl m}  | td}t|}| }W d    n1 s w   Y  dg}t dkr3|d g d}t	j
jjdg |g||d	d
d d S )Nr   r   z.cu-O3Windowsz-Wno-switch-bool)r-   z
-std=c++17z	--threads4z-use_fast_mathxgrammarTF)r   cpp_sourcescuda_sourcesextra_cflagsextra_cuda_cflags	with_cudais_python_module)r   r   __file__with_suffixopenreadplatformsystemappendr   r&   r'   load_inline)r   torch_op_file_pathfsourcecflagscuda_cflagsr"   r"   r#   _load_torch_opsH   s$   



rD   libraryregister_fakez*xgrammar::apply_token_bitmask_inplace_cudalogitsbitmaskindicesc                 C   s   d S Nr"   rG   rH   rI   r"   r"   r#   _g   s   rL   c                 C   sH   t |trtj|tj| jd}|d ur|| j}tjj	| || d S )N)dtypedevice)

isinstancelistr   tensorint32rN   toopsr0    apply_token_bitmask_inplace_cudarK   r"   r"   r#   rU   n   s
   
rU   )r   NrJ   )r;   
contextlibr   typingr   r   r   r   torch.utils.cpp_extensionr$   r,   rD   hasattrrE   _is_register_fake_availablerF   TensorrL   intrU   r"   r"   r"   r#   <module>   sD   

$

	