o
    ۾i                     @   sX   d Z ddlZddlmZmZ ddlmZ edG dd deZedkr*e	  dS dS )	a  
Matrix multiplication example via `cuda.jit`.

Reference: https://stackoverflow.com/a/64198479/13697228 by @RobertCrovella

Contents in this file are referenced from the sphinx-generated docs.
"magictoken" is used for markers as beginning and ending of example text.
    N)CUDATestCaseskip_on_cudasim)captured_stdoutz4cudasim doesn't support cuda import at non-top-levelc                       s4   e Zd ZdZ fddZ fddZdd Z  ZS )
TestMatMulzo
    Text matrix multiplication using simple, shared memory/square, and shared
    memory/nonsquare cases.
    c                    s    t  | _| j  t   d S N)r   _captured_stdout	__enter__supersetUpself	__class__ ]/home/ubuntu/.local/lib/python3.10/site-packages/numba/cuda/tests/doc_examples/test_matmul.pyr
      s   
zTestMatMul.setUpc                    s   | j d d d  t   d S r   )r   __exit__r	   tearDownr   r   r   r   r      s   zTestMatMul.tearDownc                    s  ddl mm ddl}ddl}jfdd}|dddg}|ddg}|	ddg}
|}
|}
|}	d}
||jd |
d  }||jd	 |
d	  }||f}|||
f |||	 |	 }t| t||  d j fd
d}|dddg}|ddg}|	ddg}
|}
|}
|}	  f}
||jd |
d  }||jd	 |
d	  }||f}|||
f |||	 |	 }t| t||  d}| j|||| k|d |dddg}|ddg}|	ddg}
|}
|}
|}	  f}
t|jd |jd }t|jd	 |jd	 }|||
d  }|||
d	  }||f}|||
f |||	 |	 }t| t||  d}| j|||| k|d dS )z/Test of matrix multiplication on various cases.r   )cudafloat32Nc                    sv     d\}}||jd k r7||jd k r9d}t| jd D ]}|| ||f |||f  7 }q||||f< dS dS dS )z2Perform square matrix multiplication of C = A * B.   r              N)gridshaperange)ABCijtmpk)r   r   r   matmul)   s   z)TestMatMul.test_ex_matmul.<locals>.matmul      )r#   r#   r   c                    sj  j j  fd}j j  fd}d\}}jj}jj}jj}	d}
t|	D ]k}d|||f< d|||f< || jd k r]||   | jd k r]| |||   f |||f< ||jd k r}||   |jd k r}|||   |f |||f< 	  t D ]}|
|||f |||f  7 }
q	  q/||jd k r||jd k r|
|||f< dS dS dS )z
            Perform matrix multiplication of C = A * B using CUDA shared memory.

            Reference: https://stackoverflow.com/a/64198479/13697228 by @RobertCrovella
            )r   dtyper   r   r   r   N)
sharedarrayr   	threadIdxxygridDimr   r   syncthreads)r   r   r   sAsBr)   r*   txtybpgr    r   r   TPBr   r   r   r   fast_matmulN   s*   	$$
z.TestMatMul.test_ex_matmul.<locals>.fast_matmulz5fast_matmul incorrect for shared memory, square case.)msgs            z9fast_matmul incorrect for shared memory, non-square case.)numbar   r   numpymathjitarangereshapeoneszeros	to_deviceceilr   copy_to_hostprint
assertTrueallmax)r   npr<   r"   x_hy_hz_hx_dy_dz_dthreadsperblockblockspergrid_xblockspergrid_yblockspergridr4   r5   
grid_y_max
grid_x_maxr   r2   r   test_ex_matmul    sp   


+





 zTestMatMul.test_ex_matmul)__name__
__module____qualname____doc__r
   r   rV   __classcell__r   r   r   r   r      s
    r   __main__)
rZ   unittestnumba.cuda.testingr   r   numba.tests.supportr   r   rW   mainr   r   r   r   <module>   s     