o
    is                     @   s  d dl Z d dlZd dlZd dlZd dlmZmZ d dlm	Z	 d dl
mZ d dlmZ d dlmZmZmZmZ dd Zd	d
 Zdd Zdd Zdd Zdd Zdd Zdd Zdd Zdd Zdd Zdd Zdd  Zd!d" Z d#d$ Z!d%d& Z"d'd( Z#d)d* Z$d+d, Z%d-d. Z&d/d0 Z'd1d2 Z(d3d4 Z)d5d6 Z*d7d8 Z+d9d: Z,d;d< Z-d=d> Z.d?d@ Z/dAdB Z0ej1dCdDdEdF Z2ej1dCdDdGdH Z3dIdJ Z4dKdL Z5dMdN Z6dOdP Z7dQdR Z8dSdT Z9dUdV Z:dWdX Z;dYdZ Z<d[d\ Z=d]d^ Z>d_d` Z?dadb Z@dcdd ZAdedf ZBdgdh ZCdidj ZDdkdl ZEdmdn ZFdodp ZGdqdr ZHdsdt ZIdudv ZJdwdx ZKdydz ZLd{d| ZMd}d~ ZNdd ZOdd ZPdd ZQdd ZRdd ZSdd ZTdd ZUG dd deZVeWdkr]eX  dS dS )    N)cudaint64)compile_ptx)TypingError)f2)unittestCUDATestCaseskip_on_cudasimskip_unless_cc_53c                 C   s   t jj}|| d< d S Nr   r   	threadIdxxaryi r   d/home/ubuntu/veenaModal/venv/lib/python3.10/site-packages/numba/cuda/tests/cudapy/test_intrinsics.pysimple_threadidx      r   c                 C   s   t jj}|| |< d S Nr   r   r   r   r   fill_threadidx   r   r   c                 C   s>   t jj}t jj}t jj}|d |d  |d  | |||f< d S N   )r   r   r   yz)r   r   jkr   r   r   fill3d_threadidx   s   &r   c                 C   s   t d}|| |< d S r   r   gridr   r   r   r   simple_grid1d   s   
r!   c                 C   s"   t d\}}|| | ||f< d S N   r   )r   r   r   r   r   r   simple_grid2d$   s   r$   c                 C   s,   t d}t d}|dkr|| d< d S d S Nr   r   r   r    gridsize)r   r   r   r   r   r   simple_gridsize1d)   s
   

r(   c                 C   sH   t d\}}t d\}}|dkr |dkr"|| d< || d< d S d S d S )Nr#   r   r   r&   )r   r   r   r   r   r   r   r   simple_gridsize2d0   s   r)   c           	      C   sp   t d\}}t jjt jj }t jjt jj }| j\}}t|||D ]}t|||D ]
}|| | ||f< q*q"d S r"   )r   r    gridDimr   blockDimr   shaperange)	cstartXstartYgridXgridYheightwidthr   r   r   r   r   intrinsic_forloop_step8   s   
r5   c                 C      t || d< d S r   )r   popcr   r.   r   r   r   simple_popcC      r9   c                 C   s   t |||| d< d S r   )r   fmar   abr.   r   r   r   
simple_fmaG      r?   c                 C      t j|d |d | d< d S r   r   fp16haddr   r=   r>   r   r   r   simple_haddK      rF   c                 C      t j||| d< d S r   rB   rE   r   r   r   simple_hadd_scalarO   r@   rI   c                 C   s$   t j|d |d |d | d< d S r   r   rC   hfmar<   r   r   r   simple_hfmaS   s   $rL   c                 C   s   t j|||| d< d S r   rJ   r<   r   r   r   simple_hfma_scalarW      rM   c                 C   rA   r   r   rC   hsubrE   r   r   r   simple_hsub[   rG   rQ   c                 C   rH   r   rO   rE   r   r   r   simple_hsub_scalar_   r@   rR   c                 C   rA   r   r   rC   hmulrE   r   r   r   simple_hmulc   rG   rU   c                 C   rH   r   rS   rE   r   r   r   simple_hmul_scalarg   r@   rV   c                 C   rH   r   )r   rC   hdivrE   r   r   r   simple_hdiv_scalark   r@   rX   c                 C   s>   t d}|| jk r|| }|| }t j||| |< d S d S r   )r   r    sizerC   rW   )r   array_aarray_br   r=   r>   r   r   r   simple_hdiv_kernelo   s   

r\   c                 C      t j|d | d< d S r   r   rC   hnegr   r=   r   r   r   simple_hnegw   rN   ra   c                 C      t j|| d< d S r   r^   r`   r   r   r   simple_hneg_scalar{      rc   c                 C   r]   r   r   rC   habsr`   r   r   r   simple_habs   rN   rg   c                 C   rb   r   re   r`   r   r   r   simple_habs_scalar   rd   rh   c                 C   rH   r   )r   rC   heqrE   r   r   r   simple_heq_scalar   r@   rj   c                 C   rH   r   )r   rC   hnerE   r   r   r   simple_hne_scalar   r@   rl   c                 C   rH   r   )r   rC   hgerE   r   r   r   simple_hge_scalar   r@   rn   c                 C   rH   r   )r   rC   hgtrE   r   r   r   simple_hgt_scalar   r@   rp   c                 C   rH   r   )r   rC   hlerE   r   r   r   simple_hle_scalar   r@   rr   c                 C   rH   r   r   rC   hltrE   r   r   r   simple_hlt_scalar   r@   ru   T)devicec                 C      t j| |S r   rs   r   r   r   r   r   
hlt_func_1      ry   c                 C   rw   r   rs   rx   r   r   r   
hlt_func_2   rz   r{   c                 C   s   t ||o	t||| d< d S r   )ry   r{   rr=   r>   r.   r   r   r   test_multiple_hcmp_1   s   r~   c                 C   s    t ||otj||| d< d S r   )ry   r   rC   rt   r|   r   r   r   test_multiple_hcmp_2       r   c                 C   s    t ||otj||| d< d S r   )ry   r   rC   rm   r|   r   r   r   test_multiple_hcmp_3   r   r   c                 C   s$   t j||ot j||| d< d S r   rs   r|   r   r   r   test_multiple_hcmp_4      $r   c                 C   s$   t j||ot j||| d< d S r   )r   rC   rt   rm   r|   r   r   r   test_multiple_hcmp_5   r   r   c                 C   rH   r   )r   rC   hmaxrE   r   r   r   simple_hmax_scalar   r@   r   c                 C   rH   r   )r   rC   hminrE   r   r   r   simple_hmin_scalar   r@   r   c                 C   2   t d}|t| k rt j|| | |< d S d S r   )r   r    lenrC   hsinr}   r   r   r   r   r   simple_hsin      
r   c                 C   r   r   )r   r    r   rC   hcosr   r   r   r   simple_hcos   r   r   c                 C   r   r   )r   r    r   rC   hlogr   r   r   r   simple_hlog   r   r   c                 C   r   r   )r   r    r   rC   hlog2r   r   r   r   simple_hlog2   r   r   c                 C   r   r   )r   r    r   rC   hlog10r   r   r   r   simple_hlog10   r   r   c                 C   r   r   )r   r    r   rC   hexpr   r   r   r   simple_hexp   r   r   c                 C   r   r   )r   r    r   rC   hexp2r   r   r   r   simple_hexp2   r   r   c                 C   r   r   )r   r    r   rC   hsqrtr   r   r   r   simple_hsqrt   r   r   c                 C   r   r   )r   r    r   rC   hrsqrtr   r   r   r   simple_hrsqrt     
r   c                 C   s   | d S )Ng      r   )r   dtyper   r   r   numpy_hrsqrt
  s   r   c                 C   r   r   )r   r    r   rC   hceilr   r   r   r   simple_hceil  r   r   c                 C   r   r   )r   r    r   rC   hfloorr   r   r   r   simple_hfloor  r   r   c                 C   r   r   )r   r    r   rC   hrcpr   r   r   r   simple_hrcp  r   r   c                 C   r   r   )r   r    r   rC   htruncr   r   r   r   simple_htrunc#  r   r   c                 C   r   r   )r   r    r   rC   hrintr   r   r   r   simple_hrint*  r   r   c                 C   r6   r   )r   cbrtr`   r   r   r   simple_cbrt1  r:   r   c                 C   r6   r   )r   brevr8   r   r   r   simple_brev5  r:   r   c                 C   r6   r   )r   clzr8   r   r   r   
simple_clz9  r:   r   c                 C   r6   r   )r   ffsr8   r   r   r   
simple_ffs=  r:   r   c                 C   s   t || d< d S r   roundr8   r   r   r   simple_roundA  s   r   c                 C   s   t ||| d< d S r   r   )r   r.   ndigitsr   r   r   simple_round_toE  r:   r   c                 C   sJ   t d}| | dkr|d dkr|| | |< d S d| |< d S d| |< d S )Nr      r#   r         r   )r=   r>   r.   r   r   r   r   branching_with_ifsI  s   
r   c                 C   sB   t d}t |d dk|| d}t | | dk|d| |< d S )Nr   r#   r   r   r   r   )r   r    selp)r=   r>   r.   r   innerr   r   r   branching_with_selpsU  s   
r   c                 C   s   t d}t j| |< d S r   )r   r    laneidr   r   r   r   simple_laneid\  s   
r   c                 C   s   t j| d< d S r   )r   warpsize)r   r   r   r   simple_warpsizea     r   c                 C      t |  d S r   r   r   r   r   r   nonliteral_gride  r   r   c                 C   r   r   )r   r'   r   r   r   r   nonliteral_gridsizei  r   r   c                       s  e Zd Z fddZdd Zdd Zdd Zed	d
d Zed	dd Z	dd Z
dd Zdd Zeddd Zeddd Zdd Zdd Zdd Zd d! Zd"d# Zd$d% Zd&d' Zd(d) Zed*d+ Zed,d- Zed.d/d0 Zed1d2 Zed3d4 Zed.d5d6 Zed7d8 Zed9d: Zed.d;d< Z ed=d> Z!ed?d@ Z"ed.dAdB Z#edCdD Z$edEdF Z%edGdH Z&edIdJ Z'ed.dKdL Z(edMdN Z)edOdP Z*ed.dQdR Z+edSdT Z,edUdV Z-edWdX Z.edYdZ Z/ed[d\ Z0ed]d^ Z1d_d` Z2dadb Z3dcdd Z4ededfdg Z5dhdi Z6djdk Z7dldm Z8dndo Z9ededpdq Z:drds Z;dtdu Z<dvdw Z=dxdy Z>ededzd{ Z?d|d} Z@d~d ZAdd ZBdd ZCdd ZDeddd ZEdd ZFdd ZGeddd ZHdd ZI  ZJS )TestCudaIntrinsicc                    s   t    tjd d S r   )supersetUpnprandomseedself	__class__r   r   r   n  s   
zTestCudaIntrinsic.setUpc                 C   s@   t dt}tjdtjd}|d | | |d dk d S )Nvoid(int32[:])r   r   r   r   r   )r   jitr   r   onesint32
assertTruer   compiledr   r   r   r   test_simple_threadidxr     z'TestCudaIntrinsic.test_simple_threadidxc                 C   sZ   t dt}d}tj|tjd}tj|tjd}|d|f | | t||k d S )Nr   
   r   r   )	r   r   r   r   r   r   aranger   all)r   r   Nr   expr   r   r   test_fill_threadidxx  s   z%TestCudaIntrinsic.test_fill_threadidxc                    sN   d\  fdd} fdd}| }| }|  t||k d S )N)r         c                     s>   t dt} tj ftjd}| d ff | |S )Nzvoid(int32[:,:,::1])r   r   )r   r   r   r   zerosr   r   r   XYZr   r   c_contigous  s   z<TestCudaIntrinsic.test_fill3d_threadidx.<locals>.c_contigousc                     sD   t dt} ttj ftjd}| d ff | |S )Nzvoid(int32[::1,:,:])r   r   )r   r   r   r   asfortranarrayr   r   r   r   r   r   f_contigous  s   z<TestCudaIntrinsic.test_fill3d_threadidx.<locals>.f_contigous)r   r   r   )r   r   r   c_resf_resr   r   r   test_fill3d_threadidx  s   
z'TestCudaIntrinsic.test_fill3d_threadidxzCudasim does not check typesc                 C   @   |  td tdt W d    d S 1 sw   Y  d S NRequireLiteralValuezvoid(int32))assertRaisesRegexr   r   r   r   r   r   r   r   test_nonliteral_grid_error     "z,TestCudaIntrinsic.test_nonliteral_grid_errorc                 C   r   r   )r   r   r   r   r   r   r   r   r   test_nonliteral_gridsize_error  r   z0TestCudaIntrinsic.test_nonliteral_gridsize_errorc                 C   s\   t dt}d\}}|| }tj|tjd}|||f | | t|t|k d S )Nvoid(int32[::1])r      r   )	r   r   r!   r   emptyr   r   r   r   )r   r   ntidnctaidnelemr   r   r   r   test_simple_grid1d  s   z$TestCudaIntrinsic.test_simple_grid1dc           	      C   s   t dt}d}d}|d |d  |d |d  f}tj|tjd}| }|||f | t|jd D ]}t|jd D ]
}|| |||f< q?q6| 	t
||k d S Nzvoid(int32[:,::1])r   r   r   r   r   r   r   )r   r   r$   r   r   r   copyr-   r,   r   r   )	r   r   r   r   r,   r   r   r   r   r   r   r   test_simple_grid2d  s    z$TestCudaIntrinsic.test_simple_grid2dc                 C   sN   t dt}d\}}tjdtjd}|||f | | |d ||  d S )Nr   r   r   r   r   )r   r   r(   r   r   r   assertEqualr   r   r   r   r   r   r   r   test_simple_gridsize1d  s
   z(TestCudaIntrinsic.test_simple_gridsize1dzRequires too many threadsc                 C   s`   t jdd }tjdtjd}tjdtjd}|d || | |d d | |d d d S )Nc                 S   sd   t d}t jjt jj t jj }t d}t jjt jj }||kr&d| d< ||kr0d|d< d S d S r%   )r   r    blockIdxr   r+   r   r'   r*   )
grid_errorgridsize_errori1i2gs1gs2r   r   r   f  s   

z,TestCudaIntrinsic.test_issue_9229.<locals>.fr   r   )i Q   r   )r   r   r   r   uint64r  )r   r  r
  r  r   r   r   test_issue_9229  s   

z!TestCudaIntrinsic.test_issue_9229zTests PTX emissionc           
      C   s  t d d  t t d d  f}t|t}t|t}d}d}tjddtj d}| }d|d d< tj|tj d}||df ||| |	|}	| 
d	ttd
|	 tjj||dd tj|tj d}||df ||| |	|}	| 
dttd
|	 tjj||dd d S )N    r      )r,   
fill_valuer   r   r   r   r   r#   z	\s+bra\s+	branching)err_msgr   r   )r   r   r   r   r   r   fullr  r   inspect_asmr  r   refindalltestingassert_array_equal)
r   sigcu_branching_with_ifscu_branching_with_selpsnr>   r.   expectedr=   ptxr   r   r   	test_selp  s$   

zTestCudaIntrinsic.test_selpc                 C   sr   t dt}d}d}tjdtjd}|||f | | |d |d |d   | |d |d |d   d S )Nr   r  r  r#   r   r   r   )r   r   r)   r   r   r   r  r  r   r   r   test_simple_gridsize2d  s    z(TestCudaIntrinsic.test_simple_gridsize2dc              	   C   s   t dt}d}d}|d |d  |d |d  f}tj|tjd}|||f | |\}}|j\}}	tt|d t|d D ]5\}
}||
 || }}t||	|D ]!}t|||D ]}| 	|||f || k|||f || f q\qTqAd S r  )
r   r   r5   r   r   r   r,   zipr-   r   )r   r   r   r   r,   r   r1   r2   r3   r4   r   r   r/   r0   r   r   r   r   r   test_intrinsic_forloop_step  s     
".z-TestCudaIntrinsic.test_intrinsic_forloop_stepc                 C   sF   t jdd }tjdtjdddd}|d | tj|d d S )Nc                 S   s:   t d\}}}t d\}}}|| | | |||f< d S Nr   r&   )outr   r   r   r=   r>   r.   r   r   r   foo	  s   z*TestCudaIntrinsic.test_3dgrid.<locals>.fooi  r   	   )r   r   r   r-  )r   r   r   r   r   reshaper  assert_equal)r   r+  arrr   r   r   test_3dgrid  s
   
zTestCudaIntrinsic.test_3dgridc                 C   sZ   t jdd }d\}}}tj|| | tjd|||}|d | | t| d S )Nc           	      S   s   t d\}}}t d\}}}|t jjt jjt jj  ko9|t jjt jjt jj  ko9|t jjt jjt jj  k}|t jjt j	j koW|t jjt j	j koW|t jjt j	j k}|o[|| |||f< d S r)  )
r   r    r'   r   r   r	  r+   r   r   r*   )	r*  r   r   r   r=   r>   r.   grid_is_rightgridsize_is_rightr   r   r   r+    s   z,TestCudaIntrinsic.test_3dgrid_2.<locals>.foo)   r      r   ))r   r   r#   )r   r#   r   )r   r   r   r   bool_r.  r   r   )r   r+  r   r   r   r0  r   r   r   test_3dgrid_2  s   

"zTestCudaIntrinsic.test_3dgrid_2c                 C   @   t dt}tjdtjd}|d |d | |d d d S )Nvoid(int32[:], uint32)r   r   r      r   r   r   r   r9   r   r   r   r  r   r   r   r   test_popc_u4)     zTestCudaIntrinsic.test_popc_u4c                 C   r8  )Nzvoid(int32[:], uint64)r   r   r   l        @ r   r   r;  r   r   r   r   test_popc_u8/  r=  zTestCudaIntrinsic.test_popc_u8c                 C   F   t dt}tjdtjd}|d |ddd tj|d d	 d S )
Nzvoid(f4[:], f4, f4, f4)r   r   r          @      @      @r   r   )r   r   r?   r   r   float32r  assert_allcloser   r   r   r   test_fma_f45     zTestCudaIntrinsic.test_fma_f4c                 C   r?  )
Nzvoid(f8[:], f8, f8, f8)r   r   r   r@  rA  rB  r   r   )r   r   r?   r   r   float64r  rD  r   r   r   r   test_fma_f8;  rF  zTestCudaIntrinsic.test_fma_f8c                 C   sl   t dt}tjdtjd}tjdgtjd}tjdgtjd}|d ||| tj|d ||  d S Nvoid(f2[:], f2[:], f2[:])r   r   rA  rB  r   r   )	r   r   rF   r   r   float16arrayr  rD  r   r   r   arg1arg2r   r   r   	test_haddA     zTestCudaIntrinsic.test_haddc                 C   s`   t dt}tjdtjd}td}td}|d ||| || }tj|d | d S )Nvoid(f2[:], f2, f2)r   r   JM!	@rA  r   r   )r   r   rI   r   r   rK  r  rD  r   r   r   rN  rO  refr   r   r   test_hadd_scalarJ     

z"TestCudaIntrinsic.test_hadd_scalarz(Compilation unsupported in the simulatorc                 C   4   t d d  t t f}tt|dd\}}| d| d S )Nr   r   cczadd.f16)r   r   rI   assertInr   argsr$  _r   r   r   test_hadd_ptxT     zTestCudaIntrinsic.test_hadd_ptxc                 C   s   t dt}tjdtjd}tjdgtjd}tjdgtjd}tjdgtjd}|d |||| tj|d || |  d S )	Nz void(f2[:], f2[:], f2[:], f2[:])r   r   r@  rA  rB  r   r   )	r   r   rL   r   r   rK  rL  r  rD  )r   r   r   rN  rO  arg3r   r   r   	test_hfmaZ  s   zTestCudaIntrinsic.test_hfmac                 C   sp   t dt}tjdtjd}td}td}td}|d |||| || | }tj|d | d S )	Nzvoid(f2[:], f2, f2, f2)r   r   r@  rA  rB  r   r   )r   r   rM   r   r   rK  r  rD  )r   r   r   rN  rO  rb  rU  r   r   r   test_hfma_scalard  s   


z"TestCudaIntrinsic.test_hfma_scalarc                 C   s6   t d d  t t t f}tt|dd\}}| d| d S )NrY  rZ  z
fma.rn.f16)r   r   rM   r\  r]  r   r   r   test_hfma_ptxo  s   zTestCudaIntrinsic.test_hfma_ptxc                 C   sl   t dt}tjdtjd}tjdgtjd}tjdgtjd}|d ||| tj|d ||  d S rI  )	r   r   rQ   r   r   rK  rL  r  rD  rM  r   r   r   	test_hsubu  rQ  zTestCudaIntrinsic.test_hsubc                 C   s`   t dt}tjdtjd}td}td}|d ||| || }tj|d | d S NrR  r   r   rS  gQ?r   r   )r   r   rR   r   r   rK  r  rD  rT  r   r   r   test_hsub_scalar~  rW  z"TestCudaIntrinsic.test_hsub_scalarc                 C   rX  )NrY  rZ  zsub.f16)r   r   rR   r\  r]  r   r   r   test_hsub_ptx  ra  zTestCudaIntrinsic.test_hsub_ptxc                 C   sj   t  t}tjdtjd}tjdgtjd}tjdgtjd}|d ||| tj|d ||  d S )Nr   r   rA  rB  r   r   )	r   r   rU   r   r   rK  rL  r  rD  rM  r   r   r   	test_hmul  s   zTestCudaIntrinsic.test_hmulc                 C   s`   t dt}tjdtjd}td}td}|d ||| || }tj|d | d S rg  )r   r   rV   r   r   rK  r  rD  rT  r   r   r   test_hmul_scalar  rW  z"TestCudaIntrinsic.test_hmul_scalarc                 C   rX  )NrY  rZ  zmul.f16)r   r   rV   r\  r]  r   r   r   test_hmul_ptx  ra  zTestCudaIntrinsic.test_hmul_ptxc                 C   s`   t dt}tjdtjd}td}td}|d ||| || }tj|d | d S rg  )r   r   rX   r   r   rK  r  rD  rT  r   r   r   test_hdiv_scalar  s   

z"TestCudaIntrinsic.test_hdiv_scalarc                 C   s   t dt}tjjddddtj}tjjddddtj}tj|tjd}|	|j
||| || }tj|| d S )NrJ  i    i  rY   r   )r   r   r\   r   r   randintastyperK  
zeros_likeforallrY   r  rD  )r   r   arry1arry2r   rU  r   r   r   	test_hdiv  s   zTestCudaIntrinsic.test_hdivc                 C   sV   t dt}tjdtjd}tjdgtjd}|d || tj|d |  d S )Nvoid(f2[:], f2[:])r   r   rA  r   r   )	r   r   ra   r   r   rK  rL  r  rD  r   r   r   rN  r   r   r   	test_hneg  s
   zTestCudaIntrinsic.test_hnegc                 C   sR   t dt}tjdtjd}td}|d || | }tj|d | d S )Nvoid(f2[:], f2)r   r   rS  r   r   )r   r   rc   r   r   rK  r  rD  r   r   r   rN  rU  r   r   r   test_hneg_scalar  s   
z"TestCudaIntrinsic.test_hneg_scalarc                 C   2   t d d  t f}tt|dd\}}| d| d S )NrY  rZ  zneg.f16)r   r   rc   r\  r]  r   r   r   test_hneg_ptx     zTestCudaIntrinsic.test_hneg_ptxc                 C   sV   t  t}tjdtjd}tjdgtjd}|d || tj|d t	| d S )Nr   r         r   r   )
r   r   rg   r   r   rK  rL  r  rD  absrx  r   r   r   	test_habs  s
   zTestCudaIntrinsic.test_habsc                 C   sT   t dt}tjdtjd}td}|d || t|}tj|d | d S )Nrz  r   r   gJM!	r   r   )	r   r   rh   r   r   rK  r  r  rD  r{  r   r   r   test_habs_scalar  s   
z"TestCudaIntrinsic.test_habs_scalarc                 C   r}  )NrY  rZ  zabs.f16)r   r   rh   r\  r]  r   r   r   test_habs_ptx  r  zTestCudaIntrinsic.test_habs_ptxc                 C   s  t ttttttttt	t
tf}ttf}tjtjtjtjtjtjtjtjtjtjtjtf}tjtjf}d}tjd tjjdd|d tj!}t"|}t#||D ]8\}}	| j$|	d& t%&d|}|d|f || |	|tj!d}
tj'(||
 W d    n1 sw   Y  qPtjjdd|d tj!}t#||D ]8\}}	| j$|	d& t%&d|}|d|f || |	|tj!d}
tj'(||
 W d    n1 sw   Y  qd S )	Nr  r   rn  ro  fnrw  r   r   ))r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   sincosloglog2log10sqrtceilfloor
reciprocaltruncrintr   r   exp2r   r   rp  rq  rK  rr  r'  subTestr   r   r  rD  )r   kernelsexp_kernelsexpected_functionsexpected_exp_functionsr   r   r}   kernelr  r#  x2r   r   r   test_fp16_intrinsics_common  sH   
z-TestCudaIntrinsic.test_fp16_intrinsics_commonc                 C   sf   t  dd }d}tjd tj|tj}t|}|d|f || tj	
|d|  d S )Nc                 S   r   r   )r   r    r   rC   hexp10r   r   r   r   hexp10_vectors  r   z5TestCudaIntrinsic.test_hexp10.<locals>.hexp10_vectorsr  r   r   )r   r   r   r   r   randrq  rK  rr  r  rD  )r   r  r   r   r}   r   r   r   test_hexp10  s   

zTestCudaIntrinsic.test_hexp10c              	   C   s:  t tttttf}tjtjtj	tj
tjtjf}t||D ]\}}| j|dm td|}tjdtjd}tjdtjd}td}td}	td}
|d ||	|	 ||	|	}| ||d	  |d ||	|
 ||	|
}| ||d	  |d ||	| ||	|}| ||d	  W d    n1 sw   Y  qd S )
N)opzvoid(b1[:], f2, f2)r   r   r#   r   r   r   r   )rj   rl   rn   rp   rr   ru   operatoreqnegegtleltr'  r  r   r   r   r   r6  rK  r  )r   fnsopsr  r  r  r#  gotrO  rb  arg4r   r   r   test_fp16_comparison!  s4   





z&TestCudaIntrinsic.test_fp16_comparisonc              	   C   s   t ttttf}|D ]F}| j|d6 td|}tj	dtj
d}td}td}td}|d |||| | |d	  W d    n1 sJw   Y  q	d S )
Nr  zvoid(b1[:], f2, f2, f2)r   r   r@  rA  rB  r   r   )r~   r   r   r   r   r  r   r   r   r   r6  rK  r   )r   	functionsr  r   r   rN  rO  rb  r   r   r   !test_multiple_float16_comparisonsA  s$   


z3TestCudaIntrinsic.test_multiple_float16_comparisonsc                 C   s   t dt}tjdtjd}td}td}|d ||| tj|d | td}|d ||| tj|d | d S 	NrR  r   r   rA  rB  r   r   g      @)r   r   r   r   r   rK  r  rD  rM  r   r   r   	test_hmaxR     


zTestCudaIntrinsic.test_hmaxc                 C   s   t dt}tjdtjd}td}td}|d ||| tj|d | td}|d ||| tj|d | d S r  )r   r   r   r   r   rK  r  rD  rM  r   r   r   	test_hmin^  r  zTestCudaIntrinsic.test_hminc                 C   J   t dt}tjdtjd}d}|d || tj|d |d  d S )Nzvoid(float32[:], float32)r   r   r@  r   r   UUUUUU?)r   r   r   r   r   rC  r  rD  r   r   r   cbrt_argr   r   r   test_cbrt_f32j  
   zTestCudaIntrinsic.test_cbrt_f32c                 C   r  )Nzvoid(float64[:], float64)r   r   g      @r   r   r  )r   r   r   r   r   rG  r  rD  r  r   r   r   test_cbrt_f64q  r  zTestCudaIntrinsic.test_cbrt_f64c                 C   r8  )Nzvoid(uint32[:], uint32)r   r   r   i0  r   i  )r   r   r   r   r   uint32r  r   r   r   r   test_brev_u4x  r=  zTestCudaIntrinsic.test_brev_u4z.only get given a Python "int", assumes 32 bitsc                 C   r8  )Nzvoid(uint64[:], uint64)r   r   r   l   0  C r   l       `x)r   r   r   r   r   r  r  r   r   r   r   test_brev_u8~     zTestCudaIntrinsic.test_brev_u8c                 C   r8  )Nvoid(int32[:], int32)r   r   r      r      r   r   r   r   r   r   r  r   r   r   r   test_clz_i4  r=  zTestCudaIntrinsic.test_clz_i4c                 C   s@   t dt}tjdtjd}|d |d | |d d dS )	a  
        Although the CUDA Math API
        (http://docs.nvidia.com/cuda/cuda-math-api/group__CUDA__MATH__INTRINSIC__INT.html)
        only says int32 & int64 arguments are supported in C code, the LLVM
        IR input supports i8, i16, i32 & i64 (LLVM doesn't have a concept of
        unsigned integers, just unsigned operations on integers).
        http://docs.nvidia.com/cuda/nvvm-ir-spec/index.html#bit-manipulations-intrinics
        r9  r   r   r   r  r   r  Nr  r   r   r   r   test_clz_u4  s   	zTestCudaIntrinsic.test_clz_u4c                 C   s@   t dt}tjdtjd}|d |d | |d d d S Nr  r   r   r   l    r   r  r   r   r   r   test_clz_i4_1s  r=  z TestCudaIntrinsic.test_clz_i4_1sc                 C   sB   t dt}tjdtjd}|d |d | |d dd d S )Nr  r   r   r   r   r  CUDA semanticsr  r   r   r   r   test_clz_i4_0s  s   z TestCudaIntrinsic.test_clz_i4_0sc                 C   r8  )Nvoid(int32[:], int64)r   r   r      r   /   r  r   r   r   r   test_clz_i8  r  zTestCudaIntrinsic.test_clz_i8c                 C   ^   t dt}tjdtjd}|d |d | |d d |d |d | |d d	 d S )
Nr  r   r   r   r  r              r  r   r   r   r   r   r   r  r   r   r   r   test_ffs_i4     zTestCudaIntrinsic.test_ffs_i4c                 C   r  )
Nr9  r   r   r   r  r   r  r  r  r  r   r   r   r   test_ffs_u4  r  zTestCudaIntrinsic.test_ffs_u4c                 C   s@   t dt}tjdtjd}|d |d | |d d d S r  r  r   r   r   r   test_ffs_i4_1s  r=  z TestCudaIntrinsic.test_ffs_i4_1sc                 C   s@   t dt}tjdtjd}|d |d | |d d d S )Nr  r   r   r   r   r  r   r   r   r   test_ffs_i4_0s  r=  z TestCudaIntrinsic.test_ffs_i4_0sc                 C   r  )
Nr  r   r   r   r  r   r  l        !   r  r   r   r   r   test_ffs_i8  s   zTestCudaIntrinsic.test_ffs_i8c                 C   sj   t dt}d}tj|d tjd}ttjdtjd|}|d|d f | | t	||k d S )Nr   r#   r  r   r   )
r   r   r   r   r   r   tiler   r   r   )r   r   countr   r   r   r   r   test_simple_laneid  s   z$TestCudaIntrinsic.test_simple_laneidc                 C   s@   t dt}tjdtjd}|d | | |d dd d S )Nr   r   r   r   r   r  r  )r   r   r   r   r   r   r  r   r   r   r   test_simple_warpsize  r   z&TestCudaIntrinsic.test_simple_warpsizec                 C   N   t dt}tjdtjd}dD ]}|d || | |d t| qd S )Nzvoid(int64[:], float32)r   r   r  g      g      g      g      ?g      @g      @g      @r   r   r   r   r   r   r   r   r  r   r   r   r   r   r   r   r   test_round_f4     zTestCudaIntrinsic.test_round_f4c                 C   r  )Nzvoid(int64[:], float64)r   r   r  r   r   r  r  r   r   r   test_round_f8  r  zTestCudaIntrinsic.test_round_f8c              	   C   s   t dt}tjdtjd}tjd tjdtj}t	|t
tjtj tjgf d}t||D ]0\}}| j||d |d ||| | j|d	 t||d
d W d    n1 scw   Y  q8d S )N void(float32[:], float32, int32)r   r   {   r  )r   r   r#   r   r   r   r   valr   r   r   singleprec)r   r   r   r   r   rC  r   r   rq  concatenaterL  infnan	itertoolsproductr  assertPreciseEqualr   r   r   r   valsdigitsr  r   r   r   r   test_round_to_f4  s   "	z"TestCudaIntrinsic.test_round_to_f4z$Overflow behavior differs on CPythonc                 C   T   t dt}tjdtjd}ttjj}d}|d ||| | |d | d S )Nr  r   r   i,  r   r   )	r   r   r   r   r   rC  finfomaxr  r   r   r   r  r   r   r   r   test_round_to_f4_overflow	  s   z+TestCudaIntrinsic.test_round_to_f4_overflowc                 C   T   t dt}tjdtjd}d}d}|d ||| | j|d t||dd	 d S )
Nr  r   r   gQ?r   r   r   r  r  )r   r   r   r   r   rC  r  r   r  r   r   r   test_round_to_f4_halfway  s   z*TestCudaIntrinsic.test_round_to_f4_halfwayc              	   C   s0  t dt}tjdtjd}tjd tjd}t|t	tj
tj
 tjgf d}t||D ]0\}}| j||d |d ||| | j|d	 t||d
d W d    n1 s_w   Y  q4d}d}| j||d |d ||| | j|d	 t||dd W d    d S 1 sw   Y  d S )N void(float64[:], float64, int32)r   r   r  r  )r  r  r  r  r  r   r   r#   r   r   r   r  r   r   exactr  g`8p=<   double)r   r   r   r   r   rG  r   r   r  rL  r  r  r  r  r  r  r   r  r   r   r   test_round_to_f8!  s,   ""z"TestCudaIntrinsic.test_round_to_f8c                 C   r  )Nr  r   r   r4  r   r   )	r   r   r   r   r   rG  r  r  r  r  r   r   r   test_round_to_f8_overflow8  s   z+TestCudaIntrinsic.test_round_to_f8_overflowc                 C   r  )
Nr  r   r   g\(\?r   r   r   r  r  )r   r   r   r   r   rG  r  r   r  r   r   r   test_round_to_f8_halfwayE  s   z*TestCudaIntrinsic.test_round_to_f8_halfway)K__name__
__module____qualname__r   r   r   r   r	   r   r   r   r  r  r  r%  r&  r(  r1  r7  r<  r>  rE  rH  r
   rP  rV  r`  rc  rd  re  rf  rh  ri  rj  rk  rl  rm  rv  ry  r|  r~  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  __classcell__r   r   r   r   r   m  s    






	

	




	


	











"










r   __main__)Yr  numpyr   r  r  numbar   r   
numba.cudar   numba.core.errorsr   numba.core.typesr   numba.cuda.testingr   r   r	   r
   r   r   r   r!   r$   r(   r)   r5   r9   r?   rF   rI   rL   rM   rQ   rR   rU   rV   rX   r\   ra   rc   rg   rh   rj   rl   rn   rp   rr   ru   r   ry   r{   r~   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r  mainr   r   r   r   <module>   s    



     
i