o
    ۾iR#                     @   s   d dl Zd dlmZmZmZmZmZ d dlm	Z	m
Z
mZ d dlmZ dd Zdd Zd	d
 Zdd Zdd Zdd Zdd Zdd Zdd Zdd Zdd Zdd Zdd Zdd  Zed!G d"d# d#e
Zed$krpe	  dS dS )%    N)cudaint32int64float32float64)unittestCUDATestCaseskip_on_cudasim)configc                 C   s4   t d}|dkrd| d< t d | d | |< d S )N   r   *       )r   gridsyncwarp)aryi r   Y/home/ubuntu/.local/lib/python3.10/site-packages/numba/cuda/tests/cudapy/test_warp_ops.pyuseful_syncwarp   s
   

r   c                 C   $   t d}t d||}|| |< d S Nr   r   r   r   	shfl_sync)r   idxr   valr   r   r   use_shfl_sync_idx      
r   c                 C   r   r   )r   r   shfl_up_syncr   deltar   r   r   r   r   use_shfl_sync_up   r   r    c                 C   r   r   )r   r   shfl_down_syncr   r   r   r   use_shfl_sync_down   r   r"   c                 C   r   r   )r   r   shfl_xor_sync)r   xorr   r   r   r   r   use_shfl_sync_xor!   r   r%   c                 C   s$   t d}t d|d}|| |< d S Nr   r   r   r   )r   intor   r   r   r   r   use_shfl_sync_with_val'   r   r(   c                 C   &   t d}t d| | }|||< d S r   )r   r   all_syncary_inary_outr   predr   r   r   use_vote_sync_all-      
r/   c                 C   r)   r   )r   r   any_syncr+   r   r   r   use_vote_sync_any3   r0   r2   c                 C   r)   r   )r   r   eq_syncr+   r   r   r   use_vote_sync_eq9   r0   r4   c                 C   s    t jj}t dd}|| |< d S )Nr   Tr   	threadIdxxballot_sync)r   r   ballotr   r   r   use_vote_sync_ballot?   s   r:   c                 C   r)   r   )r   r   match_any_sync)r,   r-   r   r9   r   r   r   use_match_any_syncE   r0   r<   c                 C   s2   t d}t d| | \}}|r|nd||< d S r&   )r   r   match_all_sync)r,   r-   r   r9   r.   r   r   r   use_match_all_syncK   s   
r>   c                 C   sz   t jj}|d dkrt dd}n&|d dkrt dd}n|d dkr+t dd}n|d d	kr7t d
d}|| |< d S )N   r   Tr   """"   DDDD       r5   )arrr   r9   r   r   r   use_independent_schedulingQ   s   rG   c                 C   s   t jrdS t j| kS )NT)r
   ENABLE_CUDASIMr   get_current_devicecompute_capability)ccr   r   r   _safe_cc_check^   s   rL   z2Warp Operations are not yet implemented on cudasimc                   @   s   e Zd Zdd Zdd Zdd Zdd Zd	d
 Zdd Zdd Z	dd Z
dd Zdd Zeedddd Zeedddd Zeedddd Zdd Zd d! Zd"S )#TestCudaWarpOperationsc                 C   sJ   t dt}d}tj|tjd}|d|f | | t|dk d S )Nzvoid(int32[:])    dtyper   r   )r   jitr   npemptyr   
assertTrueallselfcompilednelemr   r   r   r   test_useful_syncwarpg   s
   z+TestCudaWarpOperations.test_useful_syncwarpc                 C   sP   t dt}d}d}tj|tjd}|d|f || | t||k d S Nvoid(int32[:], int32)rN   r?   rO   r   )r   rQ   r   rR   rS   r   rT   rU   )rW   rX   rY   r   r   r   r   r   test_shfl_sync_idxn   s   z)TestCudaWarpOperations.test_shfl_sync_idxc                 C   st   t dt}d}d}tj|tjd}tj|tjd}||d   |8  < |d|f || | t||k d S r[   )	r   rQ   r    rR   rS   r   arangerT   rU   rW   rX   rY   r   r   expr   r   r   test_shfl_sync_upv   s   z(TestCudaWarpOperations.test_shfl_sync_upc                 C   sv   t dt}d}d}tj|tjd}tj|tjd}|d |   |7  < |d|f || | t||k d S r[   )	r   rQ   r"   rR   rS   r   r^   rT   rU   r_   r   r   r   test_shfl_sync_down   s   z*TestCudaWarpOperations.test_shfl_sync_downc                 C   sd   t dt}d}d}tj|tjd}tj|tjd|A }|d|f || | t||k d S )Nr\   rN      rO   r   )	r   rQ   r%   rR   rS   r   r^   rT   rU   )rW   rX   rY   r$   r   r`   r   r   r   test_shfl_sync_xor   s   z)TestCudaWarpOperations.test_shfl_sync_xorc                 C   s   t tttf}t dtdttjttjf}t||D ].\}}t|d d  |ft	}d}tj
||jd}|d|f || | t||k qd S )Nl        rN   rO   r   )r   r   r   r   rR   pizipr   rQ   r(   rS   rP   rT   rU   )rW   typesvaluestypr   rX   rY   r   r   r   r   test_shfl_sync_types   s   z+TestCudaWarpOperations.test_shfl_sync_typesc                 C   s   t dt}d}tj|tjd}tj|tjd}|d|f || | t|dk d|d< |d|f || | t|dk d S )Nvoid(int32[:], int32[:])rN   rO   r   r   re   )	r   rQ   r/   rR   onesr   rS   rT   rU   rW   rX   rY   r,   r-   r   r   r   test_vote_sync_all   s   z)TestCudaWarpOperations.test_vote_sync_allc                 C   s   t dt}d}tj|tjd}tj|tjd}|d|f || | t|dk d|d< d|d< |d|f || | t|dk d S )Nrl   rN   rO   r   r   rB      )	r   rQ   r2   rR   zerosr   rS   rT   rU   rn   r   r   r   test_vote_sync_any   s   z)TestCudaWarpOperations.test_vote_sync_anyc                 C   s   t dt}d}tj|tjd}tj|tjd}|d|f || | t|dk d|d< |d|f || | t|dk d|d d < |d|f || | t|dk d S )Nrl   rN   rO   r   r   )	r   rQ   r4   rR   rq   r   rS   rT   rU   rn   r   r   r   test_vote_sync_eq   s   z(TestCudaWarpOperations.test_vote_sync_eqc                 C   sP   t dt}d}tj|tjd}|d|f | | t|tdk d S )Nvoid(uint32[:])rN   rO   r   r   )r   rQ   r:   rR   rS   uint32rT   rU   rV   r   r   r   test_vote_sync_ballot   s
   z,TestCudaWarpOperations.test_vote_sync_ballot)   r   z-Matching requires at least Volta Architecturec                 C   sl   t dt}d}tj|tjdd }tj|tjd}tdd}|d|f || | t	||k d S )Nrl   
   rO   rB   )iU  i  rp   r   )
r   rQ   r<   rR   r^   r   rS   tilerT   rU   )rW   rX   rY   r,   r-   r`   r   r   r   test_match_any_sync   s   z*TestCudaWarpOperations.test_match_any_syncc                 C   s   t dt}d}tj|tjd}tj|tjd}|d|f || | t|dk d|d< |d|f || | t|dk d S )Nrl   rx   rO   r   i  r?   r   )	r   rQ   r>   rR   rq   r   rS   rT   rU   rn   r   r   r   test_match_all_sync   s   z*TestCudaWarpOperations.test_match_all_syncz;Independent scheduling requires at least Volta Architecturec                 C   sN   t dt}tjdtjd}tdd}|d | | t||k d S )Nrt   rN   rO   )r@   rA   rC   rE      r   rN   )	r   rQ   rG   rR   rS   ru   ry   rT   rU   )rW   rX   rF   r`   r   r   r   test_independent_scheduling   s
   z2TestCudaWarpOperations.test_independent_schedulingc                 C   sH   t jdd }tjdtjd}|d | tdd}tj|| d S )Nc                 S   s6   t d}|d dkrt  | |< d S t  | |< d S )Nr   rB   r   )r   r   
activemaskr7   r   r   r   r   use_activemask   s   
z>TestCudaWarpOperations.test_activemask.<locals>.use_activemaskrN   rO   r}   )iUUUUl   *UU rc   )r   rQ   rR   rq   ru   ry   testingassert_equal)rW   r   outexpectedr   r   r   test_activemask   s   
z&TestCudaWarpOperations.test_activemaskc                 C   sZ   t jdd }tjdtjd}|d | tjdd tdD tjd}tj|| d S )Nc                 S   s   t d}t  | |< d S )Nr   )r   r   lanemask_ltr   r   r   r   use_lanemask_lt  s   
z@TestCudaWarpOperations.test_lanemask_lt.<locals>.use_lanemask_ltrN   rO   r}   c                 S   s   g | ]}d | d qS )rB   r   r   ).0r   r   r   r   
<listcomp>  s    z;TestCudaWarpOperations.test_lanemask_lt.<locals>.<listcomp>)	r   rQ   rR   rq   ru   asarrayranger   r   )rW   r   r   r   r   r   r   test_lanemask_lt  s   
z'TestCudaWarpOperations.test_lanemask_ltN)__name__
__module____qualname__rZ   r]   ra   rb   rd   rk   ro   rr   rs   rv   r   
skipUnlessrL   rz   r{   r~   r   r   r   r   r   r   rM   e   s2    

	


	


rM   __main__)numpyrR   numbar   r   r   r   r   numba.cuda.testingr   r   r	   
numba.corer
   r   r   r    r"   r%   r(   r/   r2   r4   r:   r<   r>   rG   rL   rM   r   mainr   r   r   r   <module>   s0     .