o
    Y۷iZ                     @   s  d dl Z d dlmZ d dlmZmZmZmZmZm	Z	m
Z
mZ d dlmZmZmZmZmZmZ d dlmZ d dlmZ d dlmZ d dlmZ e ZejZejZejZee G d	d
 d
eZeG dd deZ eG dd deZ!eG dd deZ"eG dd deZ#eG dd deZ$eG dd deZ%eG dd deZ&eG dd deZ'eG dd deZ(eG dd deZ)eG dd  d eZ*eG d!d" d"eZ+eG d#d$ d$eZ,eG d%d& d&eZ-eG d'd( d(eZ.eG d)d* d*eZ/eG d+d, d,eZ0eG d-d. d.eZ1eG d/d0 d0eZ2eG d1d2 d2eZ3eG d3d4 d4eZ4d5d6 Z5d7d8 Z6d9d: Z7ee8G d;d< d<eZ9d=d> Z:d?d@ Z;dAdB Z<dCdD Z=e7ej>j?Z@e=e jAZBe=e jCZDe7ej>jEZFe=e jGZHe=e jIZJe7ej>jKZLe=e jMZNe=e jOZPe7ej>jQZRe7ej>jSZTe5ej>jUZVe6e jWZXe5ej>jYZZe6e[Z\e:ej>j]Z^e<e j_ e:ej>j`Zae<e jb e:ej>jcZde<e je e:ej>jfZge<e jh e:ej>jiZje<e jk e:ej>jlZme<e jn e=e jo e=e jp dEdF ZqdGdH ZreqdIZseqdJZteqdKZueqdLZveqdMZweqdNZxeqdOZyeqdPZzeqdQZ{eqdRZ|eqdSZ}eqdTZ~eqdUZeqdVZeqdWZerdXZdYdZ ZejejejejejejfZejejejejfZejejfZeejjAeZeejjGeZeejjeZeejjeZeejjeZeejjeZeejjeZeejjeZeejjeZeejjeZeejjeZeejjeZeG d[d\ d\eZeG d]d^ d^eZeG d_d` d`eZeG dadb dbeZeG dcdd ddeZeG dedf dfeZeG dgdh dheZeG didj djeZeG dkdl dleZeG dmdn dneZeeee eD ]Zeee q!e	D ]Zeee q,eD ]Zeee q7e
D ]Zedov rNeee qBdS )p    N)types)parse_dtypeparse_shaperegister_number_classesregister_numpy_ufunctrigonometric_functionscomparison_functionsmath_operationsbit_twiddling_functions)AttributeTemplateConcreteTemplateAbstractTemplateCallableTemplate	signatureRegistrydim3)
Conversion)cuda) declare_device_function_templatec                   @      e Zd Zdd ZdS )Cuda_array_declc                 C      dd }|S )Nc                 S   s   t | tjrt | tjsd S nt | tjtjfr$tdd | D r#d S nd S t| }t|}|d ur>|d ur@tj	||ddS d S d S )Nc                 S   s   g | ]	}t |tj qS  )
isinstancer   IntegerLiteral).0sr   r   I/home/ubuntu/vllm_env/lib/python3.10/site-packages/numba/cuda/cudadecl.py
<listcomp>$   s    z:Cuda_array_decl.generic.<locals>.typer.<locals>.<listcomp>C)dtypendimlayout)
r   r   Integerr   TupleUniTupleanyr   r   Array)shaper!   r"   nb_dtyper   r   r   typer   s    z&Cuda_array_decl.generic.<locals>.typerr   selfr+   r   r   r   generic   s   zCuda_array_decl.genericN__name__
__module____qualname__r.   r   r   r   r   r      s    r   c                   @      e Zd ZejjZdS )Cuda_shared_arrayN)r0   r1   r2   r   sharedarraykeyr   r   r   r   r4   2       r4   c                   @   r3   )Cuda_local_arrayN)r0   r1   r2   r   localr6   r7   r   r   r   r   r9   7   r8   r9   c                   @      e Zd ZejjZdd ZdS )Cuda_const_array_likec                 C   r   )Nc                 S   s   | S Nr   )ndarrayr   r   r   r+   A      z,Cuda_const_array_like.generic.<locals>.typerr   r,   r   r   r   r.   @   s   zCuda_const_array_like.genericN)r0   r1   r2   r   const
array_liker7   r.   r   r   r   r   r<   <       r<   c                   @      e Zd ZejZeejgZ	dS )Cuda_threadfence_deviceN)
r0   r1   r2   r   threadfencer7   r   r   nonecasesr   r   r   r   rD   F       rD   c                   @   rC   )Cuda_threadfence_blockN)
r0   r1   r2   r   threadfence_blockr7   r   r   rF   rG   r   r   r   r   rI   L   rH   rI   c                   @   rC   )Cuda_threadfence_systemN)
r0   r1   r2   r   threadfence_systemr7   r   r   rF   rG   r   r   r   r   rK   R   rH   rK   c                   @   s*   e Zd ZejZeejeejej	gZ
dS )Cuda_syncwarpN)r0   r1   r2   r   syncwarpr7   r   r   rF   i4rG   r   r   r   r   rM   X   s    rM   c                
   @   s   e Zd ZejZeeej	ej
fej	ej	ej	ej	ej	eeejej
fej	ej	ejej	ej	eeejej
fej	ej	ejej	ej	eeejej
fej	ej	ejej	ej	gZdS )Cuda_shfl_sync_intrinsicN)r0   r1   r2   r   shfl_sync_intrinsicr7   r   r   r%   rO   b1i8f4f8rG   r   r   r   r   rP   ^   s    rP   c                   @   s6   e Zd ZejZeeej	ej
fej	ej	ej
gZdS )Cuda_vote_sync_intrinsicN)r0   r1   r2   r   vote_sync_intrinsicr7   r   r   r%   rO   rR   rG   r   r   r   r   rV   m   s
    
rV   c                   @   sV   e Zd ZejZeejejejeejejej	eejejej
eejejejgZdS )Cuda_match_any_syncN)r0   r1   r2   r   match_any_syncr7   r   r   rO   rS   rT   rU   rG   r   r   r   r   rX   t   s    rX   c                   @   s   e Zd ZejZeeej	ej
fej	ej	eeej	ej
fej	ejeeej	ej
fej	ejeeej	ej
fej	ejgZdS )Cuda_match_all_syncN)r0   r1   r2   r   match_all_syncr7   r   r   r%   rO   rR   rS   rT   rU   rG   r   r   r   r   rZ      s    rZ   c                   @   rC   )Cuda_activemaskN)
r0   r1   r2   r   
activemaskr7   r   r   uint32rG   r   r   r   r   r\      rH   r\   c                   @   rC   )Cuda_lanemask_ltN)
r0   r1   r2   r   lanemask_ltr7   r   r   r^   rG   r   r   r   r   r_      rH   r_   c                
   @   z   e Zd ZdZejZeej	ej	eej
ej
eejejeejejeejejeejejeejejeejejgZdS )	Cuda_popcz
    Supported types from `llvm.popc`
    [here](http://docs.nvidia.com/cuda/nvvm-ir-spec/index.html#bit-manipulations-intrinics)
    N)r0   r1   r2   __doc__r   popcr7   r   r   int8int16int32int64uint8uint16r^   uint64rG   r   r   r   r   rb          rb   c                   @   sB   e Zd ZdZejZeej	ej	ej	ej	eej
ej
ej
ej
gZdS )Cuda_fmaz
    Supported types from `llvm.fma`
    [here](https://docs.nvidia.com/cuda/nvvm-ir-spec/index.html#standard-c-library-intrinics)
    N)r0   r1   r2   rc   r   fmar7   r   r   float32float64rG   r   r   r   r   rm      s    rm   c                   @   s,   e Zd ZejjZeej	ej	ej	ej	gZ
dS )	Cuda_hfmaN)r0   r1   r2   r   fp16hfmar7   r   r   float16rG   r   r   r   r   rq      s    rq   c                   @   .   e Zd ZejZeejejeej	ej	gZ
dS )	Cuda_cbrtN)r0   r1   r2   r   cbrtr7   r   r   ro   rp   rG   r   r   r   r   rv      s
    rv   c                   @   ru   )	Cuda_brevN)r0   r1   r2   r   brevr7   r   r   r^   rk   rG   r   r   r   r   rx      s
    rx   c                
   @   ra   )Cuda_clzz
    Supported types from `llvm.ctlz`
    [here](http://docs.nvidia.com/cuda/nvvm-ir-spec/index.html#bit-manipulations-intrinics)
    N)r0   r1   r2   rc   r   clzr7   r   r   re   rf   rg   rh   ri   rj   r^   rk   rG   r   r   r   r   rz      rl   rz   c                
   @   sz   e Zd ZdZejZeej	ej
eej	ejeej	ejeej	ejeej	ejeej	ejeej	ej	eej	ejgZdS )Cuda_ffsz
    Supported types from `llvm.cttz`
    [here](http://docs.nvidia.com/cuda/nvvm-ir-spec/index.html#bit-manipulations-intrinics)
    N)r0   r1   r2   rc   r   ffsr7   r   r   r^   re   rf   rg   rh   ri   rj   rk   rG   r   r   r   r   r|      rl   r|   c                   @   s   e Zd ZejZdd ZdS )	Cuda_selpc                 C   sX   |rJ |\}}}t jt jt jt jt jt jt jt jf}||ks#||vr%d S t	||||S r=   )
r   rp   ro   rf   rj   rg   r^   rh   rk   r   )r-   argskwstestabsupported_typesr   r   r   r.      s   
zCuda_selp.genericN)r0   r1   r2   r   selpr7   r.   r   r   r   r   r~      s    r~   c                       t G  fdddt}|S )Nc                       s    e Zd Z ZeejejgZdS )z'_genfp16_unary.<locals>.Cuda_fp16_unaryNr0   r1   r2   r7   r   r   rt   rG   r   l_keyr   r   Cuda_fp16_unary  s    r   registerr   r   r   r   r   r   _genfp16_unary     r   c                    s    t  G  fdddt}|S )Nc                       s   e Zd Z Zdd ZdS )z0_genfp16_unary_operator.<locals>.Cuda_fp16_unaryc                 S   s8   |rJ t |dkr|d tjkrttjtjS d S d S )N   r   )lenr   rt   r   )r-   r   r   r   r   r   r.     s   z8_genfp16_unary_operator.<locals>.Cuda_fp16_unary.genericNr0   r1   r2   r7   r.   r   r   r   r   r     s    r   register_globalr   r   r   r   r   _genfp16_unary_operator  s   r   c                    r   )Nc                       s$   e Zd Z ZeejejejgZdS )z)_genfp16_binary.<locals>.Cuda_fp16_binaryNr   r   r   r   r   Cuda_fp16_binary#  s    r   r   )r   r   r   r   r   _genfp16_binary"  r   r   c                   @   r   )Floatc                 C   s&   |rJ |\}|t jkrt||S d S r=   )r   rt   r   )r-   r   r   argr   r   r   r.   .  s
   

zFloat.genericNr/   r   r   r   r   r   +  s    r   c                    r   )Nc                       s$   e Zd Z ZeejejejgZdS )z1_genfp16_binary_comparison.<locals>.Cuda_fp16_cmpN)	r0   r1   r2   r7   r   r   rR   rt   rG   r   r   r   r   Cuda_fp16_cmp8  s    r   r   )r   r   r   r   r   _genfp16_binary_comparison7  s   r   c                    s"   t  G  fdddt}|S )Nc                          e Zd Z ZfddZdS )z1_fp16_binary_operator.<locals>.Cuda_fp16_operatorc                    s   |rJ t |dkrM|d tjks|d tjkrO|d tjkr+| j|d |d }n| j|d |d }|tjksE|tjksE|tjkrQt	 tjtjS d S d S d S )N   r   r   )
r   r   rt   contextcan_convertr   exactpromotesafer   )r-   r   r   convertible)rettyr   r   r.   T  s   



z9_fp16_binary_operator.<locals>.Cuda_fp16_operator.genericNr   r   r   r   r   r   Cuda_fp16_operatorP      r   r   )r   r   r   r   r   r   _fp16_binary_operatorO  s   r   c                 C      t | tjS r=   )r   r   rR   opr   r   r   _genfp16_comparison_operatorn     r   c                 C   r   r=   )r   r   rt   r   r   r   r   _genfp16_binary_operatorr  r   r   c                 C   s"   t d|  tjtjf}t|S N__numba_wrapper_r   r   rt   Functionfnamedeclr   r   r   _resolve_wrapped_unary  s
   

r   c                 C   s&   t d|  tjtjtjf}t|S r   r   r   r   r   r   _resolve_wrapped_binary  s
   


r   hsinhcoshloghlog10hlog2hexphexp10hexp2hsqrthrsqrthfloorhceilhrcphrinthtrunchdivc                    s   t G  fdddt}|S )Nc                       r   )z_gen.<locals>.Cuda_atomicc                    s^   |rJ |\}}}|j  vrd S |jdkrt|j |tj|j S |jdkr-t|j |||j S d S Nr   )r!   r"   r   r   intp)r-   r   r   aryidxval)r   r   r   r.     s   



z!_gen.<locals>.Cuda_atomic.genericNr   r   r   r   r   r   Cuda_atomic  r   r   )r   r   )r   r   r   r   r   r   _gen  s   r   c                   @   r;   )Cuda_atomic_compare_and_swapc                 C   s@   |rJ |\}}}|j }|tv r|jdkrt||||S d S d S r   )r!   integer_numba_typesr"   r   )r-   r   r   r   oldr   dtyr   r   r   r.     s   
z$Cuda_atomic_compare_and_swap.genericN)r0   r1   r2   r   atomiccompare_and_swapr7   r.   r   r   r   r   r     rB   r   c                   @   r;   )Cuda_atomic_casc                 C   s`   |rJ |\}}}}|j }|tvrd S |jdkr!t||tj||S |jdkr.t|||||S d S r   )r!   r   r"   r   r   r   )r-   r   r   r   r   r   r   r   r   r   r   r.     s   

zCuda_atomic_cas.genericN)r0   r1   r2   r   r   casr7   r.   r   r   r   r   r     rB   r   c                   @   s"   e Zd ZejZeejej	gZ
dS )Cuda_nanosleepN)r0   r1   r2   r   	nanosleepr7   r   r   voidr^   rG   r   r   r   r   r     s    r   c                   @   s(   e Zd ZeZdd Zdd Zdd ZdS )
Dim3_attrsc                 C      t jS r=   r   rg   r-   modr   r   r   	resolve_x
     zDim3_attrs.resolve_xc                 C   r   r=   r   r   r   r   r   	resolve_y  r   zDim3_attrs.resolve_yc                 C   r   r=   r   r   r   r   r   	resolve_z  r   zDim3_attrs.resolve_zN)r0   r1   r2   r   r7   r   r   r   r   r   r   r   r     s
    r   c                   @       e Zd ZeejZdd ZdS )CudaSharedModuleTemplatec                 C   
   t tS r=   )r   r   r4   r   r   r   r   resolve_array     
z&CudaSharedModuleTemplate.resolve_arrayN)	r0   r1   r2   r   Moduler   r5   r7   r   r   r   r   r   r         r   c                   @   r   )CudaConstModuleTemplatec                 C   r   r=   )r   r   r<   r   r   r   r   resolve_array_like   r   z*CudaConstModuleTemplate.resolve_array_likeN)	r0   r1   r2   r   r   r   r@   r7   r   r   r   r   r   r     r   r   c                   @   r   )CudaLocalModuleTemplatec                 C   r   r=   )r   r   r9   r   r   r   r   r   (  r   z%CudaLocalModuleTemplate.resolve_arrayN)	r0   r1   r2   r   r   r   r:   r7   r   r   r   r   r   r   $  r   r   c                   @   s   e Zd ZeejZdd Zdd Z	dd Z
dd Zd	d
 Zdd Zdd Zdd Zdd Zdd Zdd Zdd Zdd Zdd ZdS )CudaAtomicTemplatec                 C   r   r=   )r   r   Cuda_atomic_addr   r   r   r   resolve_add0  r   zCudaAtomicTemplate.resolve_addc                 C   r   r=   )r   r   Cuda_atomic_subr   r   r   r   resolve_sub3  r   zCudaAtomicTemplate.resolve_subc                 C   r   r=   )r   r   Cuda_atomic_andr   r   r   r   resolve_and_6  r   zCudaAtomicTemplate.resolve_and_c                 C   r   r=   )r   r   Cuda_atomic_orr   r   r   r   resolve_or_9  r   zCudaAtomicTemplate.resolve_or_c                 C   r   r=   )r   r   Cuda_atomic_xorr   r   r   r   resolve_xor<  r   zCudaAtomicTemplate.resolve_xorc                 C   r   r=   )r   r   Cuda_atomic_incr   r   r   r   resolve_inc?  r   zCudaAtomicTemplate.resolve_incc                 C   r   r=   )r   r   Cuda_atomic_decr   r   r   r   resolve_decB  r   zCudaAtomicTemplate.resolve_decc                 C   r   r=   )r   r   Cuda_atomic_exchr   r   r   r   resolve_exchE  r   zCudaAtomicTemplate.resolve_exchc                 C   r   r=   )r   r   Cuda_atomic_maxr   r   r   r   resolve_maxH  r   zCudaAtomicTemplate.resolve_maxc                 C   r   r=   )r   r   Cuda_atomic_minr   r   r   r   resolve_minK  r   zCudaAtomicTemplate.resolve_minc                 C   r   r=   )r   r   Cuda_atomic_nanminr   r   r   r   resolve_nanminN  r   z!CudaAtomicTemplate.resolve_nanminc                 C   r   r=   )r   r   Cuda_atomic_nanmaxr   r   r   r   resolve_nanmaxQ  r   z!CudaAtomicTemplate.resolve_nanmaxc                 C   r   r=   )r   r   r   r   r   r   r   resolve_compare_and_swapT  r   z+CudaAtomicTemplate.resolve_compare_and_swapc                 C   r   r=   )r   r   r   r   r   r   r   resolve_casW  r   zCudaAtomicTemplate.resolve_casN)r0   r1   r2   r   r   r   r   r7   r   r   r   r   r   r   r   r   r   r  r  r  r  r  r   r   r   r   r   ,  s     r   c                   @   s  e Zd ZeejZdd Zdd Z	dd Z
dd Zd	d
 Zdd Zdd Zdd Zdd Zdd Zdd Zdd Zdd Zdd Zdd Zdd  Zd!d" Zd#d$ Zd%d& Zd'd( Zd)d* Zd+d, Zd-d. Zd/d0 Zd1d2 Z d3d4 Z!d5d6 Z"d7d8 Z#d9d: Z$d;d< Z%d=S )>CudaFp16Templatec                 C   r   r=   )r   r   	Cuda_haddr   r   r   r   resolve_hadd_  r   zCudaFp16Template.resolve_haddc                 C   r   r=   )r   r   	Cuda_hsubr   r   r   r   resolve_hsubb  r   zCudaFp16Template.resolve_hsubc                 C   r   r=   )r   r   	Cuda_hmulr   r   r   r   resolve_hmule  r   zCudaFp16Template.resolve_hmulc                 C      t S r=   )hdiv_devicer   r   r   r   resolve_hdivh  r?   zCudaFp16Template.resolve_hdivc                 C   r   r=   )r   r   	Cuda_hnegr   r   r   r   resolve_hnegk  r   zCudaFp16Template.resolve_hnegc                 C   r   r=   )r   r   	Cuda_habsr   r   r   r   resolve_habsn  r   zCudaFp16Template.resolve_habsc                 C   r   r=   )r   r   rq   r   r   r   r   resolve_hfmaq  r   zCudaFp16Template.resolve_hfmac                 C   r  r=   )hsin_devicer   r   r   r   resolve_hsint  r?   zCudaFp16Template.resolve_hsinc                 C   r  r=   )hcos_devicer   r   r   r   resolve_hcosw  r?   zCudaFp16Template.resolve_hcosc                 C   r  r=   )hlog_devicer   r   r   r   resolve_hlogz  r?   zCudaFp16Template.resolve_hlogc                 C   r  r=   )hlog10_devicer   r   r   r   resolve_hlog10}  r?   zCudaFp16Template.resolve_hlog10c                 C   r  r=   )hlog2_devicer   r   r   r   resolve_hlog2  r?   zCudaFp16Template.resolve_hlog2c                 C   r  r=   )hexp_devicer   r   r   r   resolve_hexp  r?   zCudaFp16Template.resolve_hexpc                 C   r  r=   )hexp10_devicer   r   r   r   resolve_hexp10  r?   zCudaFp16Template.resolve_hexp10c                 C   r  r=   )hexp2_devicer   r   r   r   resolve_hexp2  r?   zCudaFp16Template.resolve_hexp2c                 C   r  r=   )hfloor_devicer   r   r   r   resolve_hfloor  r?   zCudaFp16Template.resolve_hfloorc                 C   r  r=   )hceil_devicer   r   r   r   resolve_hceil  r?   zCudaFp16Template.resolve_hceilc                 C   r  r=   )hsqrt_devicer   r   r   r   resolve_hsqrt  r?   zCudaFp16Template.resolve_hsqrtc                 C   r  r=   )hrsqrt_devicer   r   r   r   resolve_hrsqrt  r?   zCudaFp16Template.resolve_hrsqrtc                 C   r  r=   )hrcp_devicer   r   r   r   resolve_hrcp  r?   zCudaFp16Template.resolve_hrcpc                 C   r  r=   )hrint_devicer   r   r   r   resolve_hrint  r?   zCudaFp16Template.resolve_hrintc                 C   r  r=   )htrunc_devicer   r   r   r   resolve_htrunc  r?   zCudaFp16Template.resolve_htruncc                 C   r   r=   )r   r   Cuda_heqr   r   r   r   resolve_heq  r   zCudaFp16Template.resolve_heqc                 C   r   r=   )r   r   Cuda_hner   r   r   r   resolve_hne  r   zCudaFp16Template.resolve_hnec                 C   r   r=   )r   r   Cuda_hger   r   r   r   resolve_hge  r   zCudaFp16Template.resolve_hgec                 C   r   r=   )r   r   Cuda_hgtr   r   r   r   resolve_hgt  r   zCudaFp16Template.resolve_hgtc                 C   r   r=   )r   r   Cuda_hler   r   r   r   resolve_hle  r   zCudaFp16Template.resolve_hlec                 C   r   r=   )r   r   Cuda_hltr   r   r   r   resolve_hlt  r   zCudaFp16Template.resolve_hltc                 C   r   r=   )r   r   	Cuda_hmaxr   r   r   r   resolve_hmax  r   zCudaFp16Template.resolve_hmaxc                 C   r   r=   )r   r   	Cuda_hminr   r   r   r   resolve_hmin  r   zCudaFp16Template.resolve_hminN)&r0   r1   r2   r   r   r   rr   r7   r  r  r  r  r  r  r  r  r  r  r  r!  r#  r%  r'  r)  r+  r-  r/  r1  r3  r5  r7  r9  r;  r=  r?  rA  rC  rE  r   r   r   r   r	  [  s@    r	  c                   @   s   e Zd ZeeZdd Zdd Zdd Z	dd Z
d	d
 Zdd Zdd Zdd Zdd Zdd Zdd Zdd Zdd Zdd Zdd Zdd  Zd!d" Zd#d$ Zd%d& Zd'd( Zd)d* Zd+d, Zd-d. Zd/d0 Zd1d2 Zd3d4 Z d5d6 Z!d7d8 Z"d9d: Z#d;S )<CudaModuleTemplatec                 C      t tjS r=   )r   r   r   cgr   r   r   r   
resolve_cg  r   zCudaModuleTemplate.resolve_cgc                 C   r  r=   r   r   r   r   r   resolve_threadIdx  r?   z$CudaModuleTemplate.resolve_threadIdxc                 C   r  r=   r   r   r   r   r   resolve_blockIdx  r?   z#CudaModuleTemplate.resolve_blockIdxc                 C   r  r=   r   r   r   r   r   resolve_blockDim  r?   z#CudaModuleTemplate.resolve_blockDimc                 C   r  r=   r   r   r   r   r   resolve_gridDim  r?   z"CudaModuleTemplate.resolve_gridDimc                 C   r   r=   r   r   r   r   r   resolve_laneid  r   z!CudaModuleTemplate.resolve_laneidc                 C   rG  r=   )r   r   r   r5   r   r   r   r   resolve_shared  r   z!CudaModuleTemplate.resolve_sharedc                 C   r   r=   )r   r   rb   r   r   r   r   resolve_popc  r   zCudaModuleTemplate.resolve_popcc                 C   r   r=   )r   r   rx   r   r   r   r   resolve_brev  r   zCudaModuleTemplate.resolve_brevc                 C   r   r=   )r   r   rz   r   r   r   r   resolve_clz  r   zCudaModuleTemplate.resolve_clzc                 C   r   r=   )r   r   r|   r   r   r   r   resolve_ffs  r   zCudaModuleTemplate.resolve_ffsc                 C   r   r=   )r   r   rm   r   r   r   r   resolve_fma  r   zCudaModuleTemplate.resolve_fmac                 C   r   r=   )r   r   rv   r   r   r   r   resolve_cbrt  r   zCudaModuleTemplate.resolve_cbrtc                 C   r   r=   )r   r   rD   r   r   r   r   resolve_threadfence  r   z&CudaModuleTemplate.resolve_threadfencec                 C   r   r=   )r   r   rI   r   r   r   r   resolve_threadfence_block  r   z,CudaModuleTemplate.resolve_threadfence_blockc                 C   r   r=   )r   r   rK   r   r   r   r   resolve_threadfence_system  r   z-CudaModuleTemplate.resolve_threadfence_systemc                 C   r   r=   )r   r   rM   r   r   r   r   resolve_syncwarp  r   z#CudaModuleTemplate.resolve_syncwarpc                 C   r   r=   )r   r   rP   r   r   r   r   resolve_shfl_sync_intrinsic  r   z.CudaModuleTemplate.resolve_shfl_sync_intrinsicc                 C   r   r=   )r   r   rV   r   r   r   r   resolve_vote_sync_intrinsic  r   z.CudaModuleTemplate.resolve_vote_sync_intrinsicc                 C   r   r=   )r   r   rX   r   r   r   r   resolve_match_any_sync  r   z)CudaModuleTemplate.resolve_match_any_syncc                 C   r   r=   )r   r   rZ   r   r   r   r   resolve_match_all_sync  r   z)CudaModuleTemplate.resolve_match_all_syncc                 C   r   r=   )r   r   r\   r   r   r   r   resolve_activemask  r   z%CudaModuleTemplate.resolve_activemaskc                 C   r   r=   )r   r   r_   r   r   r   r   resolve_lanemask_lt   r   z&CudaModuleTemplate.resolve_lanemask_ltc                 C   r   r=   )r   r   r~   r   r   r   r   resolve_selp  r   zCudaModuleTemplate.resolve_selpc                 C   r   r=   )r   r   r   r   r   r   r   resolve_nanosleep  r   z$CudaModuleTemplate.resolve_nanosleepc                 C   rG  r=   )r   r   r   r   r   r   r   r   resolve_atomic	  r   z!CudaModuleTemplate.resolve_atomicc                 C   rG  r=   )r   r   r   rr   r   r   r   r   resolve_fp16  r   zCudaModuleTemplate.resolve_fp16c                 C   rG  r=   )r   r   r   r@   r   r   r   r   resolve_const  r   z CudaModuleTemplate.resolve_constc                 C   rG  r=   )r   r   r   r:   r   r   r   r   resolve_local  r   z CudaModuleTemplate.resolve_localN)$r0   r1   r2   r   r   r   r7   rI  rJ  rK  rL  rM  rN  rO  rP  rQ  rR  rS  rT  rU  rV  rW  rX  rY  rZ  r[  r\  r]  r^  r_  r`  ra  rb  rc  rd  re  r   r   r   r   rF    s>    
rF  )loglog2log10)operator
numba.corer   numba.core.typing.npydeclr   r   r   r   r   r   r	   r
   numba.core.typing.templatesr   r   r   r   r   r   numba.cuda.typesr   numba.core.typeconvr   numbar   numba.cuda.compilerr   registryr   register_attrr   r   r4   r9   r<   rD   rI   rK   rM   rP   rV   rX   rZ   r\   r_   rb   rm   rq   rv   rx   rz   r|   r~   r   r   r   floatr   r   r   r   r   rr   haddr
  addCuda_addiadd	Cuda_iaddhsubr  subCuda_subisub	Cuda_isubhmulr  mulCuda_mulimul	Cuda_imulhmaxrB  hminrD  hnegr  negCuda_neghabsr  absCuda_absheqr6  eqhner8  nehger:  gehgtr<  gthler>  lehltr@  lttruedivitruedivr   r   r  r  r  r  r   r"  r$  r&  r,  r.  r(  r*  r0  r2  r4  r  r   rp   ro   rg   r^   rh   rk   all_numba_typesr   unsigned_int_numba_typesr   r   r   maxr   minr  nanmaxr  nanminr  and_r   or_r   xorr   incr   decr   exchr   r   r   r   r   r   r   r   r   r	  rF  r   funcr   r   r   r   <module>   sJ   ( 	

			














.^[

