o
    Y۷i.                  	   @   s  d dl Zd dlmZmZmZmZ d dlmZm	Z	 d dl
mZ eg Zejdejdd ZeejdejdddZejd	ejdd
d
d
d d ZejdejdZejg defdefgdZejddgdefdefgdZejddgejdejfdejfdejfdejfdejfgdddZdd Zdd Zdd Zdd  Z d!d" Z!d#d$ Z"d%d& Z#d'd( Z$G d)d* d*e	Z%e&d+kre'  dS dS ),    N)cuda	complex64int32float64)unittestCUDATestCase)ENABLE_CUDASIM
   dtypeg       @d   }      y              ?y               @   xy)      ?   )g      @   )   r   r   l   >[=    )r   r      l   ^} r	   abzT)r   alignc                 C   &   t jt}t d}t|| |< d S Nr   )r   const
array_likeCONST_EMPTYgridlenACi r'   [/home/ubuntu/vllm_env/lib/python3.10/site-packages/numba/cuda/tests/cudapy/test_constmem.pycuconstEmpty"      
r)   c                 C   s*   t jt}t d}|| d | |< d S )Nr   r   )r   r   r   CONST1Dr!   r#   r'   r'   r(   cuconst(   s   
r,   c                 C   s2   t jt}t d\}}|||f | ||f< d S )Nr   )r   r   r   CONST2Dr!   )r$   r%   r&   jr'   r'   r(   	cuconst2d0   s   r/   c                 C   s@   t jt}t jj}t jj}t jj}||||f | |||f< d S )N)r   r   r   CONST3D	threadIdxr   r   r   )r$   r%   r&   r.   kr'   r'   r(   	cuconst3d6   s
   r3   c                 C   r   r   )r   r   r   CONST_RECORD_EMPTYr!   r"   r#   r'   r'   r(   cuconstRecEmpty>   r*   r5   c                 C   s:   t jt}t d}|| d | |< || d ||< d S )Nr   r   r   )r   r   r   CONST_RECORDr!   )r$   Br%   r&   r'   r'   r(   
cuconstRecD   s   
r8   c                 C   sj   t jt}t d}|| d | |< || d ||< || d ||< || d ||< || d ||< d S )Nr   r   r   r   r   r   )r   r   r   CONST_RECORD_ALIGNr!   )r$   r7   r%   DEZr&   r'   r'   r(   cuconstRecAlignK   s   
r=   c                 C   s:   t jt}t jt}t d}|| ||  | |< d S r   )r   r   r   CONST3BYTESr+   r!   )r   r   r   r&   r'   r'   r(   cuconstAlignU   s   
r?   c                   @   sL   e Zd Zdd Zdd Zdd Zdd Zd	d
 Zdd Zdd Z	dd Z
dS )TestCudaConstantMemoryc                 C   sj   t d d  f}t|t}tt}|d | | t|td k t	s3| 
d||d d S d S )N)r   r   r   zld.const.f64z'as we're adding to it, load as a double)r   r   jitr,   np
zeros_liker+   
assertTrueallr   assertIninspect_asm)selfsigjcuconstr$   r'   r'   r(   test_const_array]   s   
z'TestCudaConstantMemory.test_const_arrayc                 C   D   t dt}tjddtjd}|d | | t|dk d S Nzvoid(int64[:])r   
fill_valuer   )r   r   r   )r   rA   r)   rB   fullint64rD   rE   )rH   jcuconstEmptyr$   r'   r'   r(   test_const_emptyj      z'TestCudaConstantMemory.test_const_emptyc              	   C   sP   t dt}tjdtjtd}|d | | t|t	t
d d  k d S )Nzvoid(float64[:])r   rO   )r   r   )r   rA   r?   rB   rQ   nanfloatrD   rE   r>   r+   )rH   jcuconstAlignr$   r'   r'   r(   test_const_alignp   s   $z'TestCudaConstantMemory.test_const_alignc                 C   sr   t d d d d f f}t|t}tjtdd}|d | | t|tk t	s7| 
d||d d S d S )Nr%   order))r   r   )r   r   zld.const.u32zload the ints as ints)r   r   rA   r/   rB   rC   r-   rD   rE   r   rF   rG   )rH   rI   
jcuconst2dr$   r'   r'   r(   test_const_array_2dv   s   z*TestCudaConstantMemory.test_const_array_2dc                 C   s   t d d d d d d f f}t|t}tjtdd}|d | | t|tk t	s@|
|}d}d}| ||| d S d S )NFrZ   )r   )r   r   r   zld.const.v2.f32z&Load the complex as a vector of 2x f32)r   r   rA   r3   rB   rC   r0   rD   rE   r   rG   rF   )rH   rI   
jcuconst3dr$   asmcomplex_loaddescriptionr'   r'   r(   test_const_array_3d   s   
z*TestCudaConstantMemory.test_const_array_3dc                 C   rL   rM   )r   rA   r5   rB   rQ   rR   rD   rE   )rH   jcuconstRecEmptyr$   r'   r'   r(   test_const_record_empty   rU   z.TestCudaConstantMemory.test_const_record_emptyc                 C   sd   t jdtd}t jdtd}tt||}|d || t j	|t
d  t j	|t
d  d S )Nr   r
   r   r   r   r   )rB   zerosrW   intr   rA   r8   
specializetestingassert_allcloser6   )rH   r$   r7   rJ   r'   r'   r(   test_const_record   s   z(TestCudaConstantMemory.test_const_recordc                 C   s   t jdt jd}t jdt jd}t jdt jd}t jdt jd}t jdt jd}tt|||||}|d ||||| t j|t	d  t j|t	d  t j|t	d  t j|t	d  t j|t	d  d S )	Nr   r
   rf   r   r   r   r   r   )
rB   rg   r   r   rA   r=   ri   rj   rk   r9   )rH   r$   r7   r%   r:   r;   rJ   r'   r'   r(   test_const_record_align   s   z.TestCudaConstantMemory.test_const_record_alignN)__name__
__module____qualname__rK   rT   rY   r]   rc   re   rl   rm   r'   r'   r'   r(   r@   \   s    	r@   __main__)(numpyrB   numbar   r   r   r   numba.cuda.testingr   r   numba.core.configr   arrayr    aranger+   asfortranarrayreshaper-   r0   uint8r>   rW   rh   r4   r6   r   uint32r9   r)   r,   r/   r3   r5   r8   r=   r?   r@   rn   mainr'   r'   r'   r(   <module>   sZ    

S