o
    ig                      @   s&  d dl Z d dlmZ d dlmZmZ ddlmZ dd Zdd	 Z	d
d Z
dd Zdd Zdd Zdd Zdd Zdd Zdd Zdd Zdd Zdd Zd d! Zd"d# Zd$d% Zd&d' Zd(d) Zd*d+ Zd,d- Zd.d/ Zd0d1 Zd2d3d4d5d6d7d8d9d:d;d<d=d>d?d@ZdAdB ZG dCdD dDe Z!dEdF Z"dS )G    N)ir)cgutilstargetconfig   )nvvmc                 C   sN   dt | d }tt|tt|t|t|f}t| ||S )N___numba_atomic_i	_cas_hack)strr   FunctionTypeIntTypePointerTyper   get_or_insert_function)lmodisizefnamefnty r   Q/home/ubuntu/veenaModal/venv/lib/python3.10/site-packages/numba/cuda/nvvmutils.pydeclare_atomic_cas_int   s   r   c                 C   s   |  |||dd}| |dS )N	monotonicr   )cmpxchgextract_value)builderr   r   ptrcmpvaloutr   r   r   atomic_cmpxchg   s   r   c                 C   s6   d}t t  t t  dt  f}t| ||S )Nz#llvm.nvvm.atomic.load.add.f32.p0f32r   r   r
   	FloatTyper   r   r   r   r   r   r   r   r   declare_atomic_add_float32   s
   
r!   c                 C   sP   t   }|jdkrd}nd}tt tt t f}t	| ||S )N)   r   z#llvm.nvvm.atomic.load.add.f64.p0f64___numba_atomic_double_add)
r   ConfigStacktopcompute_capabilityr   r
   
DoubleTyper   r   r   )r   flagsr   r   r   r   r   declare_atomic_add_float64   s   

r)   c                 C   4   d}t t  t t  t  f}t| ||S )N___numba_atomic_float_subr   r    r   r   r   declare_atomic_sub_float32'   
   
r,   c                 C   r*   )N___numba_atomic_double_subr   r
   r'   r   r   r   r    r   r   r   declare_atomic_sub_float64.   r-   r0   c                 C   :   d}t t dt t dt df}t| ||S )Nz"llvm.nvvm.atomic.load.inc.32.p0i32    r   r
   r   r   r   r   r    r   r   r   declare_atomic_inc_int325   
   r4   c                 C   r1   )N___numba_atomic_u64_inc@   r3   r    r   r   r   declare_atomic_inc_int64<   r5   r8   c                 C   r1   )Nz"llvm.nvvm.atomic.load.dec.32.p0i32r2   r3   r    r   r   r   declare_atomic_dec_int32C   r5   r9   c                 C   r1   )N___numba_atomic_u64_decr7   r3   r    r   r   r   declare_atomic_dec_int64J   r5   r;   c                 C   r*   )N___numba_atomic_float_maxr   r    r   r   r   declare_atomic_max_float32Q   r-   r=   c                 C   r*   )N___numba_atomic_double_maxr/   r    r   r   r   declare_atomic_max_float64X   r-   r?   c                 C   r*   )N___numba_atomic_float_minr   r    r   r   r   declare_atomic_min_float32_   r-   rA   c                 C   r*   )N___numba_atomic_double_minr/   r    r   r   r   declare_atomic_min_float64f   r-   rC   c                 C   r*   )N___numba_atomic_float_nanmaxr   r    r   r   r   declare_atomic_nanmax_float32m   r-   rE   c                 C   r*   )N___numba_atomic_double_nanmaxr/   r    r   r   r   declare_atomic_nanmax_float64t   r-   rG   c                 C   r*   )N___numba_atomic_float_nanminr   r    r   r   r   declare_atomic_nanmin_float32{   r-   rI   c                 C   r*   )N___numba_atomic_double_nanminr/   r    r   r   r   declare_atomic_nanmin_float64   r-   rK   c                 C   s,   d}t t dt df}t| ||S )NcudaCGGetIntrinsicHandler7   r2   r   r
   r   r   r   r    r   r   r    declare_cudaCGGetIntrinsicHandle   s
   
rN   c                 C   s4   d}t t dt dt df}t| ||S )NcudaCGSynchronizer2   r7   rM   r    r   r   r   declare_cudaCGSynchronize   s
   rP   c                 C   s`   | j jj}t|dd }tj||jdtj	d}d|_
d|_||_| |ttddS )	Nzutf-8    _str)name	addrspaceinternalT   generic)basic_blockfunctionmoduler   make_bytearrayencodeadd_global_variabletyper   ADDRSPACE_CONSTANTlinkageglobal_constantinitializeraddrspacecastr   r   r   )r   valuer   cvalglr   r   r   declare_string   s   
rg   c                 C   s8   t t d}t t d||g}t| |d}|S )NrV   r2   vprintf)r   r   r   r
   r   r   )r   	voidptrty	vprintftyrh   r   r   r   declare_vprint   s   rk   zllvm.nvvm.read.ptx.sreg.tid.xzllvm.nvvm.read.ptx.sreg.tid.yzllvm.nvvm.read.ptx.sreg.tid.zzllvm.nvvm.read.ptx.sreg.ntid.xzllvm.nvvm.read.ptx.sreg.ntid.yzllvm.nvvm.read.ptx.sreg.ntid.zzllvm.nvvm.read.ptx.sreg.ctaid.xzllvm.nvvm.read.ptx.sreg.ctaid.yzllvm.nvvm.read.ptx.sreg.ctaid.zz llvm.nvvm.read.ptx.sreg.nctaid.xz llvm.nvvm.read.ptx.sreg.nctaid.yz llvm.nvvm.read.ptx.sreg.nctaid.zz llvm.nvvm.read.ptx.sreg.warpsizezllvm.nvvm.read.ptx.sreg.laneid)ztid.xztid.yztid.zzntid.xzntid.yzntid.zzctaid.xzctaid.yzctaid.zznctaid.xznctaid.yznctaid.zwarpsizelaneidc                 C   s6   | j }ttdd}t||t| }| |dS )Nr2   r   )rZ   r   r
   r   r   r   SREG_MAPPINGcall)r   rS   rZ   r   fnr   r   r   	call_sreg   s   rq   c                   @   s<   e Zd Zdd Zdd Zdd Zdd Zd	d
 Zdd ZdS )SRegBuilderc                 C   s
   || _ d S N)r   )selfr   r   r   r   __init__   s   
zSRegBuilder.__init__c                 C      t | jd| S )Nztid.%srq   r   rt   xyzr   r   r   tid      zSRegBuilder.tidc                 C   rv   )Nzctaid.%srw   rx   r   r   r   ctaid   r{   zSRegBuilder.ctaidc                 C   rv   )Nzntid.%srw   rx   r   r   r   ntid   r{   zSRegBuilder.ntidc                 C   rv   )Nz	nctaid.%srw   rx   r   r   r   nctaid   r{   zSRegBuilder.nctaidc                 C   sb   t d}| j| ||}| j| ||}| j| ||}| j| j|||}|S )Nr7   )	r   r   r   sextrz   r}   r|   addmul)rt   ry   i64rz   r}   r~   resr   r   r   getdim   s   
zSRegBuilder.getdimN)	__name__
__module____qualname__ru   rz   r|   r}   r~   r   r   r   r   r   rr      s    rr   c                    s@   t |   fdddD }tt|d |}|dkr|d S |S )Nc                 3   s    | ]}  |V  qd S rs   )r   ).0ry   sregr   r   	<genexpr>   s    z get_global_id.<locals>.<genexpr>ry   r   r   )rr   list	itertoolsislice)r   dimitseqr   r   r   get_global_id   s   r   )#r   llvmliter   
numba.corer   r   cudadrvr   r   r   r!   r)   r,   r0   r4   r8   r9   r;   r=   r?   rA   rC   rE   rG   rI   rK   rN   rP   rg   rk   rn   rq   objectrr   r   r   r   r   r   <module>   sX    	