o
    ¦¸¢i  ã                   @   sÔ   d dl Zd dlmZmZmZ d dlmZmZm	Z	 d dl
mZ dd„ Zdd„ Zd	d
„ Zdd„ Zdd„ Zdd„ Zdd„ Zdd„ Zdd„ Zdd„ Zdd„ Zdd„ Zdd„ Zdd „ ZG d!d"„ d"e	ƒZed#krhe ¡  dS dS )$é    N)ÚcudaÚint32Úfloat32)Úskip_on_cudasimÚunittestÚCUDATestCase)ÚENABLE_CUDASIMc                 C   ó   t  d¡}t  ¡  || |< d S ©Né   )r   ÚgridÚsyncthreads©ÚaryÚi© r   ú_/home/ubuntu/transcripts/venv/lib/python3.10/site-packages/numba/cuda/tests/cudapy/test_sync.pyÚuseless_syncthreads   ó   
r   c                 C   r	   r
   ©r   r   Úsyncwarpr   r   r   r   Úuseless_syncwarp   r   r   c                 C   s    t  d¡}t  d¡ || |< d S )Nr   éÿÿ  r   r   r   r   r   Úuseless_syncwarp_with_mask   s   

r   c                 C   sô   t j dt¡}t  d¡}|||< t  ¡  |dk r)|| ||d   ||< t  d¡ |dk r>|| ||d   ||< t  d¡ |dk rS|| ||d   ||< t  d¡ |d	k rh|| ||d	   ||< t  d
¡ |dkrx|d |d  | d< d S d S )Né    r   é   r   é   éÿ   é   é   é   é   r   )r   ÚsharedÚarrayr   r   r   )ÚresÚsmr   r   r   r   Úcoop_syncwarp   s&   




ÿr&   c                 C   sR   d}t j |t¡}t  d¡}|dkrt|ƒD ]}|||< qt  ¡  || | |< d S )Néd   r   r   )r   r"   r#   r   r   Úranger   )r   ÚNr%   r   Újr   r   r   Úsimple_smem4   s   

r+   c                 C   sT   t  d¡\}}t j dt¡}|d |d  |||f< t  ¡  |||f | ||f< d S )Nr    ©é
   é   r   ©r   r   r"   r#   r   r   )r   r   r*   r%   r   r   r   Úcoop_smem2d?   s
   r0   c                 C   s<   t  d¡}t j dt¡}|d ||< t  ¡  || | |< d S )Nr   r   r    r/   )r   r   r%   r   r   r   Údyn_shared_memoryG   s
   
r1   c                 C   ó,   | d  d7  < t  ¡  | d  d7  < d S ©Nr   é{   iA  )r   Úthreadfence©r   r   r   r   Úuse_threadfenceO   ó   r7   c                 C   r2   r3   )r   Úthreadfence_blockr6   r   r   r   Úuse_threadfence_blockU   r8   r:   c                 C   r2   r3   )r   Úthreadfence_systemr6   r   r   r   Úuse_threadfence_system[   r8   r<   c                 C   ó    t  d¡}t  | | ¡||< d S r
   )r   r   Úsyncthreads_count©Úary_inÚary_outr   r   r   r   Úuse_syncthreads_counta   ó   
rB   c                 C   r=   r
   )r   r   Úsyncthreads_andr?   r   r   r   Úuse_syncthreads_andf   rC   rE   c                 C   r=   r
   )r   r   Úsyncthreads_orr?   r   r   r   Úuse_syncthreads_ork   rC   rG   c                 C   s   t rdS t ¡ j| kS )NT)r   r   Úget_current_deviceÚcompute_capability)Úccr   r   r   Ú_safe_cc_checkp   s   rK   c                   @   sü   e Zd Zdd„ Zdd„ Zedƒdd„ ƒZedƒe e	dƒd	¡d
d„ ƒƒZ
edƒe e	dƒd	¡dd„ ƒƒZdd„ Zdd„ Zdd„ Zdd„ Zdd„ Zdd„ Zdd„ Zdd„ Zdd„ Zd d!„ Zd"d#„ Zd$d%„ Zd&d'„ Zd(d)„ Zd*d+„ Zd,d-„ Zd.d/„ Zd0d1„ Zd2S )3ÚTestCudaSyncc                 C   sT   t  d¡|ƒ}d}tj|tjd}tj|tjd}|d|f |ƒ tj ||¡ d S )Núvoid(int32[::1])r-   ©Údtyper   )r   ÚjitÚnpÚemptyr   ÚarangeÚtestingÚassert_equal)ÚselfÚkernelÚcompiledÚnelemr   Úexpr   r   r   Ú_test_uselessx   s   zTestCudaSync._test_uselessc                 C   ó   |   t¡ d S ©N)r[   r   ©rV   r   r   r   Útest_useless_syncthreads€   s   z%TestCudaSync.test_useless_syncthreadsz#syncwarp not implemented on cudasimc                 C   r\   r]   )r[   r   r^   r   r   r   Útest_useless_syncwarpƒ   s   z"TestCudaSync.test_useless_syncwarp)é   r   z'Partial masks require CC 7.0 or greaterc                 C   r\   r]   )r[   r   r^   r   r   r   Útest_useless_syncwarp_with_mask‡   s   z,TestCudaSync.test_useless_syncwarp_with_maskc                 C   sP   d}d}d}t  d¡tƒ}tjdtjd}|||f |ƒ tj ||d ¡ d S )Nið  r   r   rM   rN   r   )r   rP   r&   rQ   Úzerosr   rT   rU   )rV   ÚexpectedÚnthreadsÚnblocksrX   r$   r   r   r   Útest_coop_syncwarp   s   zTestCudaSync.test_coop_syncwarpc              	   C   sV   t  d¡tƒ}d}tj|tjd}|d|f |ƒ |  t |tj|tjdk¡¡ d S )NrM   r'   rN   r   )	r   rP   r+   rQ   rR   r   Ú
assertTrueÚallrS   )rV   rX   rY   r   r   r   r   Útest_simple_smemœ   s
   $zTestCudaSync.test_simple_smemc                 C   s’   t  d¡tƒ}d}tj|tjd}|d|f |ƒ t |¡}t|jd ƒD ]}t|jd ƒD ]}|d |d  |||f< q.q%|  	t 
||¡¡ d S )Nzvoid(float32[:,::1])r,   rN   r   r   )r   rP   r0   rQ   rR   r   Ú
empty_liker(   Úshaperh   Úallclose)rV   rX   rl   r   rZ   r   r*   r   r   r   Útest_coop_smem2d£   s   
ÿzTestCudaSync.test_coop_smem2dc              
   C   sf   t  d¡tƒ}d}tj|tjd}|d|d|jd f |ƒ |  t |dtj	|jtj
d k¡¡ d S )Nzvoid(float32[::1])é2   rN   r   r   r   r    )r   rP   r1   rQ   rR   r   Úsizerh   ri   rS   r   )rV   rX   rl   r   r   r   r   Útest_dyn_shared_memory®   s
   *z#TestCudaSync.test_dyn_shared_memoryc                 C   óf   t d d … f}t |¡tƒ}tjdtj d}|d |ƒ |  d|d ¡ ts1|  d| 	|¡¡ d S d S )Nr-   rN   ©r   r   é¼  r   z
membar.gl;)
r   r   rP   r7   rQ   rc   ÚassertEqualr   ÚassertInÚinspect_asm©rV   ÚsigrX   r   r   r   r   Útest_threadfence_codegenµ   ó   ÿz%TestCudaSync.test_threadfence_codegenc                 C   rr   )Nr-   rN   rs   rt   r   zmembar.cta;)
r   r   rP   r:   rQ   rc   ru   r   rv   rw   rx   r   r   r   Útest_threadfence_block_codegen¿   r{   z+TestCudaSync.test_threadfence_block_codegenc                 C   rr   )Nr-   rN   rs   rt   r   zmembar.sys;)
r   r   rP   r<   rQ   rc   ru   r   rv   rw   rx   r   r   r   Útest_threadfence_system_codegenÉ   r{   z,TestCudaSync.test_threadfence_system_codegenc                 C   s^   t  t¡}tjd|d}tjdtjd}d|d< d|d< |d ||ƒ |  t |dk¡¡ d S )NéH   rN   r   é   é*   )r   r~   éF   )	r   rP   rB   rQ   Úonesrc   r   rh   ri   )rV   Úin_dtyperX   r@   rA   r   r   r   Ú_test_syncthreads_countÓ   s   
z$TestCudaSync._test_syncthreads_countc                 C   ó   |   tj¡ d S r]   )r„   rQ   r   r^   r   r   r   Útest_syncthreads_countÜ   ó   z#TestCudaSync.test_syncthreads_countc                 C   r…   r]   )r„   rQ   Úint16r^   r   r   r   Útest_syncthreads_count_upcastß   r‡   z*TestCudaSync.test_syncthreads_count_upcastc                 C   r…   r]   )r„   rQ   Úint64r^   r   r   r   Útest_syncthreads_count_downcastâ   r‡   z,TestCudaSync.test_syncthreads_count_downcastc                 C   s„   t  t¡}d}tj||d}tj|tjd}|d|f ||ƒ |  t |dk¡¡ d|d< |d|f ||ƒ |  t |dk¡¡ d S ©Nr'   rN   r   r   r   )	r   rP   rE   rQ   r‚   rc   r   rh   ri   ©rV   rƒ   rX   rY   r@   rA   r   r   r   Ú_test_syncthreads_andå   ó   
z"TestCudaSync._test_syncthreads_andc                 C   r…   r]   )rŽ   rQ   r   r^   r   r   r   Útest_syncthreads_andð   r‡   z!TestCudaSync.test_syncthreads_andc                 C   r…   r]   )rŽ   rQ   rˆ   r^   r   r   r   Útest_syncthreads_and_upcastó   r‡   z(TestCudaSync.test_syncthreads_and_upcastc                 C   r…   r]   )rŽ   rQ   rŠ   r^   r   r   r   Útest_syncthreads_and_downcastö   r‡   z*TestCudaSync.test_syncthreads_and_downcastc                 C   s„   t  t¡}d}tj||d}tj|tjd}|d|f ||ƒ |  t |dk¡¡ d|d< |d|f ||ƒ |  t |dk¡¡ d S rŒ   )r   rP   rG   rQ   rc   r   rh   ri   r   r   r   r   Ú_test_syncthreads_orù   r   z!TestCudaSync._test_syncthreads_orc                 C   r…   r]   )r“   rQ   r   r^   r   r   r   Útest_syncthreads_or  r‡   z TestCudaSync.test_syncthreads_orc                 C   r…   r]   )r“   rQ   rˆ   r^   r   r   r   Útest_syncthreads_or_upcast  r‡   z'TestCudaSync.test_syncthreads_or_upcastc                 C   r…   r]   )r“   rQ   rŠ   r^   r   r   r   Útest_syncthreads_or_downcast
  r‡   z)TestCudaSync.test_syncthreads_or_downcastN)Ú__name__Ú
__module__Ú__qualname__r[   r_   r   r`   r   Ú
skipUnlessrK   rb   rg   rj   rn   rq   rz   r|   r}   r„   r†   r‰   r‹   rŽ   r   r‘   r’   r“   r”   r•   r–   r   r   r   r   rL   w   sB    

ÿ
ÿ


	rL   Ú__main__)ÚnumpyrQ   Únumbar   r   r   Únumba.cuda.testingr   r   r   Únumba.core.configr   r   r   r   r&   r+   r0   r1   r7   r:   r<   rB   rE   rG   rK   rL   r—   Úmainr   r   r   r   Ú<module>   s.     ÿ