o
    i                     @   s  U d dl Z d dlZd dlZd dlZd dlZd dlZd dlZd dlZd dlZd dl	Z	d dl
mZ d dlZd dlmZ d dlmZ d dlmZ d dlmZ d dlmZ d dlmZ d d	lmZ d d
lmZ e ZdaejdadZdaG dd de Z!G dd de Z"G dd de Z#d^ddZ$e% dd Z&dee' fddZ(dee' fddZ)dd Z*e% dd Z+d Z,e% d!d" Z-e% d#d$ Z.ej%d%d&d'd( Z/ej%d%d&d^d)d*Z0d+d, Z1d-d. Z2d/d0 Z3d1d2 Z4e4d3d4Z5d4a6d5d6 Z7d7d8 Z8e9e8d9Z:	;		4d_d<d=Z;		>	d`d?d@Z<dAdB Z=ej>?dCZ@dDdE ZAi aBeCeDdF< 		Gdad4ddd4dHdIdJZE	G		4dbdKdLZFG dMdN dNe ZGG dOdP dPeHZIdQdR ZJd^dSdTZKdUdV ZLdWdX ZMdaNdYdZ ZO		4	%dcd\d]ZPdS )d    N)Optional)device)function)get_rocm_path)driver)runtimenvrtc)_environment)_utilwin32)
--device-cz-dcz	-rdc=truez--relocatable-device-code=truec                   @      e Zd ZdS )NVCCExceptionN__name__
__module____qualname__ r   r   O/home/ubuntu/veenaModal/venv/lib/python3.10/site-packages/cupy/cuda/compiler.pyr   !       r   c                   @   r   )HIPCCExceptionNr   r   r   r   r   r   %   r   r   c                   @   r   )JitifyExceptionNr   r   r   r   r   r   )   r   r   c           
   
   C   s  z>t j}tr"t }|d ur"|t j t jdd }t|}||d< tj	| ||tj
dtr/tjndd}|d ur<|| |W S  tjyj } zd||j|j|j}	|dkrZt|	|dkrbt|	t|	d }~w ty } zd	t| }	t|	|d }~ww )
NPATH Tr   )cwdenvstderruniversal_newlinescreationflagsz^`{0}` command returns non-zero exit status. 
command: {1}
return-code: {2}
stdout/stderr: 
{3}nvcchipccz>Failed to run `{0}` command. Check PATH environment variable: )osenviron_win32_get_extra_path_for_msvcpathsepgetcopydeepcopy
subprocesscheck_outputSTDOUTCREATE_NO_WINDOWwriteCalledProcessErrorformatcmd
returncodeoutputr   r   RuntimeErrorOSErrorstr)
r1   r   backend
log_streamr   
extra_pathpathlogemsgr   r   r   _run_cc-   sL   

r>   c                  C   s2   t d} | r	d S t }|r|S t }|r|S d S )Ncl.exe)shutilwhich_get_cl_exe_dir_get_cl_exe_dir_fallback)cl_exe
cl_exe_dirr   r   r   r%   ^   s   
r%   returnc               
   C   s   z=zdd l } W n ty   Y W d S w | jt j}|D ]}tj	|d}tj
|r2|  W S qtd|  W d S  ty] } ztdt| d|  W Y d }~d S d }~ww )Nr   r?   zcl.exe could not be found in z,Failed to find cl.exe with setuptools.msvc: : )setuptools.msvc	ExceptionmsvcEnvironmentInfoplatformmachineVCToolsr"   r:   joinexistswarningswarntype)
setuptoolsvctoolsr:   rD   r<   r   r   r   rB   p   s*   
rB   c               
   C   s   z&ddl m}  ddlm} || ddi}|  |j  tj	|jj
W S  tyF } ztdt| d|  W Y d }~d S d }~ww )Nr   )Distribution)	build_extnamecupy_cl_exe_discoverz'Failed to find cl.exe with setuptools: rG   )rT   rV   setuptools.command.build_extrW   setup_shlib_compilershlib_compiler
initializer"   r:   dirnameccrI   rQ   rR   rS   )rV   rW   extr<   r   r   r   rC      s   
rC   c                   C   s   t d u rt a t S N)_nvrtc_versionr	   
getVersionr   r   r   r   _get_nvrtc_version   s   rd   c                  C   s   ddl m}  | jS )Nr   core)
cupy._corerf   CUPY_CACHE_KEYre   r   r   r   _get_cupy_cache_key   s   ri   )3253627287c                  C   sz   t  \} }| dk rd}|S | dkr|dkrd}|S | dkr%|dk r%d}|S | dkr-|dks5| dkr9|dk r9d}|S d	}|S )
N   75r   80   86   90120)rd   )majorminornvrtc_max_compute_capabilityr   r   r   _get_max_compute_capability   s   
 rz   c                  C   s$   t  \} }tdd t| |D S )Nc                 s   s    | ]}d | V  qdS )-INr   ).0dr   r   r   	<genexpr>   s
    
z._get_extra_include_dir_opts.<locals>.<genexpr>)rd   tupler
   $_get_include_dir_from_conda_or_wheel)rw   rx   r   r   r   _get_extra_include_dir_opts   s   
r   T)for_each_devicec                  C   s*   t  } t j}|tv r|S t|| tdS )N)key)rz   r   Devicecompute_capability_tegra_archsminint)ry   archr   r   r   	_get_arch   s
   
r   c                 C   s@   | d u rt  } tst| tt krd|  dfS d|  dfS )Nz	-arch=sm_cubinz-arch=compute_ptx)r   _use_ptxr   rz   r   r   r   r   _get_arch_for_options_for_nvrtc   s   r   c                 C      t dd | D S )Nc                 s   s    | ]	}|t v r|V  qd S ra   
_rdc_flagsr|   or   r   r   r~          z'_is_cudadevrt_needed.<locals>.<genexpr>)anyoptionsr   r   r   _is_cudadevrt_needed      r   c                  C   sv   t d urt S ddlm}  |  }|d u rtdtr|d7 }n|d }tj|s-|d7 }n|}tj|s9td|S )Nr   )get_cuda_pathzCUDA is not found.z/lib/x64/cudadevrt.libz/lib64/libcudadevrt.az/lib/libcudadevrt.az>Relocatable PTX code is requested, but cudadevrt is not found.)
_cudadevrt	cupy.cudar   r4   r$   r"   r:   isfile)r   	cudadevrtcudadevrt64r   r   r   _get_cudadevrt_path   s"   

r   c                 C   r   )Nc                 s   s    | ]	}|t vr|V  qd S ra   r   r   r   r   r   r~     r   z%_remove_rdc_option.<locals>.<genexpr>)r   r   r   r   r   _remove_rdc_option
  r   r   c                 C   sH   t j| }|d u st|dkr|S zt|dkW S  ty#   Y dS w )Nr      F)r"   r#   r'   lenr   
ValueError)rX   defaultvalr   r   r   _get_bool_env_variable  s   r   CUPY_COMPILE_WITH_PTXFc              
   C   s   ddl m} tsddlm} |  ||  da| }|d |  } z|| |\}}}}W n* tyY }	 zt	t
|	|||d}
tdd}|rN|
tj tt
|
|	d }	~	ww ||ks`J |||fS )	Nr   )jitifyre   T
r   CUPY_DUMP_CUDA_SOURCE_ON_ERRORF)r   r   #_jitify_header_source_map_populatedrg   rf   _init_module_add_sources_get_header_source_maprI   CompileExceptionr6   r   dumpsysr   r   )sourcer   cu_pathr   rf   
old_sourcerX   headersinclude_namesr<   cexr   r   r   r   _jitify_prep  s,   

r   c                 C   s   t j| dd S )NF)usedforsecurity)hashlibsha1	hexdigest)valuer   r   r   _hash_hexdigestA  r   r       r   kern.cuc              	      s    fdd}|sHt  2}	tj|	|}
t|
d}||  W d    n1 s)w   Y  || ||
|||W  d    S 1 sAw   Y  d S |sLdn|}
|| ||
|||S )Nc              
      s   t jst \}}||f7 }nd}|rt| ||\}}}	nd }}	t \}
}|
dkr.|d7 }t| |||	||d}z|||\}}W ||fS  ty_ } ztdd}|rZ|	t
j  d }~ww )Nr   r   rt   )z#--device-as-default-execution-space)name_expressionsmethodr   F)r   is_hipr   r   rd   _NVRTCProgramcompiler   r   r   r   r   )r   r   r   r   r8   r   arch_optr   r   r   major_versionminor_versionprogcompiled_objmappingr<   r   r   r   r   _compileK  s6   

z%compile_using_nvrtc.<locals>._compilewr   )tempfileTemporaryDirectoryr"   r:   rO   openr.   )r   r   r   filenamer   r8   cache_in_memoryr   r   root_dirr   cu_filer   r   r   compile_using_nvrtcH  s    
$	
r   r   c                 C   s.  ddl m} |st }|dvrtd|dkr|rJ dj|d}| }	|	 }
|
| t Z}|dd }t	j
||}d	| }d
||f }t|d}||  W d    n1 s`w   Y  |s|
d|  |
t|7 }
|
| z	t|
|d| W n ty } ztt|| ||d}tdd}|r|tj |d }~ww |
 }|d |d }|
t|d|f 7 }
|
| z	t|
|d| W n% ty } ztt|| ||d}tdd}|r|tj |d }~ww t|}|d|d|d f7 }|t| }
z	t|
|d| W n ty) } ztt|dd|d}|d }~ww |dkrQt|d}| W  d    W  d    S 1 sKw   Y  n+|dkrxt|d}| W  d    W  d    S 1 srw   Y  nJ |W d    d S W d    d S 1 sw   Y  d S )Nr   )get_nvcc_path)r   r   z,Invalid code_type %s. Should be cubin or ptxr   z'-gencode=arch=compute_{cc},code=sm_{cc})r_   .z%s.cuz%s.%sr   z--%sr    r   Fz--cubinz.o-oz--device-link.cubinr   rbr   )r   r   r   r   r0   splitappendr   r   r"   r:   rO   r   r.   listr>   r   r   r6   r   r   r   r   r(   r   read)r   r   r   r   	code_typeseparate_compilationr8   r   arch_str_nvccr1   r   
first_partr:   r   result_pathr   r<   r   r   cmd_partialobjptx_filebin_filer   r   r   compile_using_nvccz  s   




;
>>$r   c           	   
   C   s   |dkr4|d |f7 }t| }z	||\}}W nN ty3 } ztdd}|r.|tj  d }~ww |dkrbz|d }t| ||ddd	}W n  tya } ztdd}|r\|tj  d }~ww t	d
| t
|tsoJ ddd |  D S )Nr	   z-arch=compute_{}r   Fr    )r   zpreprocess.ptxzpreprocess.cur   )r   Invalid backend %sr   c                 s   s    | ]
}| d r|V  qdS )z//N
startswith)r|   xr   r   r   r~     s    

z_preprocess.<locals>.<genexpr>)r0   r   r   r   r   r   r   r   r   r   
isinstancebytesrO   decode
splitlines)	r   r   r   r7   r   result_r<   r   r   r   r   _preprocess  sD   



r   z~/.cupy/kernel_cachec                   C   s   t jdtS )NCUPY_CACHE_DIR)r"   r#   r'   _default_cache_dirr   r   r   r   get_cache_dir  s   r   _empty_file_preprocess_cacher	   )enable_cooperative_groupsr   r8   r   c                C   s   |r	t jr	td|d ur|dkrttddo|dk}
t jr3|dkr%dnd}t| ||||||||
	S t| |||||||||
|	S )Nz+Cooperative groups is not supported in HIP.r	   CUPY_CACHE_IN_MEMORYFhiprtcr!   )r   r   r   NotImplementedErrorr   _compile_with_cache_hip_compile_with_cache_cuda)r   r   r   	cache_dirextra_sourcer7   r   r   r8   r   r   r   r   r   _compile_module_with_cache  s*   
r  c               	   C   s*  |d u rt  }|d u rt }|d7 }|r|d7 }tddr!|d7 }d|v }|
r.|s.|d7 }n|r4|
s4d}
|
r>|d	kr>td
|t 7 }||t |ft| }t|d }|d u rct	d|||}|t|< d||| |t
 f }|d}t|d }t }|	stj|stj|dd tj||}tj|r|st|d}| }W d    n1 sw   Y  t|tkr|d t }|td  }t|d}||kr|| |S n	 |d	kr|	rdn|d }t| ||||||	|
\}}t|rt }||d t }|| |  }n|}|!| n|dkr.t|}t"| |||d d||d}ntd| |	st|d}t#j$|dd}|%| |%| |j&}W d    n	1 s^w   Y  t'(|| tddrt|d d}|%|  W d    n	1 sw   Y  n	 || |S )N)z	-ftz=true)r   CUPY_CUDA_COMPILE_WITH_DEBUGF)z--device-debugz--generate-line-info-DCUPY_USE_JITIFY)r  Tr	   zjitify only works with NVRTCr   z%s %s %s %s %sutf-8r   exist_okr   ascii.cuzcupy.ptxr    r   )r   r   r8   r   dirdeleteCUPY_CACHE_SAVE_CUDA_SOURCEr   ))r   r   r   r   r   rd   r   r   r'   r   ri   encoder   r   Moduler"   r:   isdirmakedirsrO   rP   r   r   r   _hash_lengthloadr   r   	LinkStateadd_ptr_datar   add_ptr_filecomplete_set_mappingr   r   NamedTemporaryFiler.   rX   r@   move) r   r   r   r  r  r7   r   r   r8   r   r   is_jitify_requestedr   basekey_srcrX   modr:   filedatahashr   
cubin_hashcu_namer   r   lsr   rdctf	temp_pathfr   r   r   r     s   














r   c                       sF   e Zd Zd fdd	Zdd Zdd Zdd	 Zd
d Zdd Z  Z	S )r   r	   c                    s0   || _ || _|| _|| _|| _tt|   d S ra   )_msgr   rX   r   r7   superr   __init__)selfr=   r   rX   r   r7   	__class__r   r   r,    s   zCompileException.__init__c                 C   s    t | | j| j| j| j| jffS ra   )rS   r*  r   rX   r   r7   r-  r   r   r   
__reduce__  s   zCompileException.__reduce__c                 C   s   t | S ra   )r6   r0  r   r   r   __repr__     zCompileException.__repr__c                 C   s   |   S ra   )get_messager0  r   r   r   __str__  r3  zCompileException.__str__c                 C   s   | j S ra   )r*  r0  r   r   r   r4    s   zCompileException.get_messagec                 C   s   | j d}tttt|d }d|}|d| j	
  |d|  |d |d| j |dd	| j |d
 t|D ]\}}|||d |  d  qO|d |  d S )Nr   r   z
{{:0{}d}} z{} zcompilation error: {}
z-----
z	Name: {}
zOptions: {}
 zCUDA source:
)r   r   r   mathfloorlog10r   r0   r.   r7   upperrX   rO   r   	enumeraterstripflush)r-  r)  linesdigits	linum_fmtiliner   r   r   r     s   


"
zCompileException.dumpr   )
r   r   r   r,  r1  r2  r5  r4  r   __classcell__r   r   r.  r   r     s    r   c                   @   s2   e Zd Z		dddZejfddZdd	d
ZdS )r   default_programr   Nr   c                 C   s\   d | _ t|tr|d}t|tr|d}|| _|| _t||||| _ || _|| _	d S )NzUTF-8)
ptrr   r   r   srcrX   r	   createProgramr   r   )r-  rF  rX   r   r   r   r   r   r   r   r,    s   




z_NVRTCProgram.__init__c                 C   s$   | rd S | j rt| j  d S d S ra   )rE  r	   destroyProgram)r-  is_shutting_downr   r   r   __del__  s
   z_NVRTCProgram.__del__c              	   C   s   zZ| j r| j D ]	}t| j| qt| j| d }| j r.i }| j D ]}t| j|||< q"|d ur;|t| j | jdkrIt	| j|fW S | jdkrWt
| j|fW S td tjyx   t| j}t|| j| j|tjsuddw )Nr   r   zUnknown NVRTC compile methodr	   r   )r   r	   addNameExpressionrE  compileProgramgetLoweredNamer.   getProgramLogr   getCUBINgetPTXr4   
NVRTCErrorr   rF  rX   r   r   )r-  r   r8   kerr   r;   r   r   r   r     s2   



z_NVRTCProgram.compile)rD  r   r   Nr   )r   N)r   r   r   r,  r   rI  rJ  r   r   r   r   r   r     s    
r   c                 C   s   t d| d uS )Nz^[a-zA-Z_][a-zA-Z_0-9]*$)rematch)rX   r   r   r   is_valid_kernel_name  s   rU  c                 C   sX  ddgt | }t }tj|d}|d }|d }t|d}	|	|  W d    n1 s1w   Y  ||d|g7 }z	t||d|}
W n% t	yk } zt
t|| ||d}tdd	}|re|tj |d }~ww tj|szt	d
||
t|d}	|	 W  d    W  d    S 1 sw   Y  W d    d S 1 sw   Y  d S )Nr!   z--gencokern.cpp.hsacor   r   r   FzP`hipcc` command does not generate output file. 
command: {0}
stdout/stderr: 
{1}r   )r   r   r   r"   r:   rO   r   r.   r>   r   r   r6   r   r   r   r   r   r0   r   )r   r   r   r8   r1   r   r:   in_pathout_pathr)  r3   r<   r   r   r   r   r   compile_using_hipcc  sD   

"r[  c              	   C   s   ddgt | }t F}tj|d}d| }t|d}||  W d    n1 s-w   Y  || t	||d}t
|tsDJ tdd|W  d    S 1 sUw   Y  d S )Nr!   z--preprocessrV  z%s.cppr   z	(?m)^#.*$r   )r   r   r   r"   r:   rO   r   r.   r   r>   r   r6   rS  sub)r   r   r1   r   r:   r   r   pp_srcr   r   r   _preprocess_hipcc  s   

$r^  c              
   C   st   t dkrd}nd}t|}z	||\}}W n ty0 } ztdd}|r+|tj  d }~ww t|t	s8J |S )Nthz}
        // hiprtc segfaults if the input code is empty
        __global__ void _cupy_preprocess_dummy_kernel_() { }
        z
        // hiprtc segfaults if the input code is empty
        #include <hip/hip_runtime.h>
        __global__ void _cupy_preprocess_dummy_kernel_() { }
        r   F)
_cuda_hip_versionr   r   r   r   r   r   r   r   r   )r   r   coder   r   r   r<   r   r   r   r   _preprocess_hiprtc-  s"   rb  c                 C   s   |sd|  S t dkr| S t dkrd|  S td u r/|d ur/|d}dd |D }d| a}| d} dd | D } dt d|  } | S )	Nz#include <hip/hip_runtime.h>
r_  i  r   c                 S   s$   g | ]}| d s| ds|qS )#includez#pragma oncer   r|   rB  r   r   r   
<listcomp>\  s
    
z*_convert_to_hip_source.<locals>.<listcomp>c                 S   s   g | ]	}| d s|qS )rc  r   rd  r   r   r   re  b  s    z7#include <hip/hip_runtime.h>
#include <hip/hip_fp16.h>
)r`  _hip_extra_sourcer   rO   )r   r  	is_hiprtcr   r   r   _convert_to_hip_sourceL  s&   

rh  r   c
                 C   s  t |rtd|d7 }t }
|
dkr"|
dk r"|dt  d f7 }|d u r)t }|d u r2t j}|	r=t	| ||dkd} ||t
 |f}t|d }|d u ra|dkrXtd	|}ntd	|}|t|< d
||| |f }|d}t|d }t }|stj|stj|dd tj||}tj|r|st|d}| }W d    n1 sw   Y  t|tkr|d t }|td  }t|d}||kr|| |S n	 |dkrt| |||d |||\}}|| nt | |||}|sMt|d}t!j"|dd}|#| |#| |j$}W d    n	1 sw   Y  t%&|| t'ddrLt|d d}|#|  W d    n	1 sGw   Y  n	 || |S )Nz,separate compilation is not supported in HIP)z-fcuda-flush-denormals-to-zeroifi ir{   z/llvm/lib/clang/13.0.0/include/r   )rg  r   z%s %s %s %sr  rX  Tr  r   r	  r
  Fr  r  rW  r   )(r   r   r   get_build_versionr   r   r   r   r   rh  rd   r   r'   rb  r^  r  r   r   r  r"   r:   r  r  rO   rP   r   r   r   r  r  r   r  r[  r   r  r.   rX   r@   r  r   )r   r   r   r  r  r7   r   r8   r   use_converterrocm_build_versionr   r  r  rX   r  r:   r)  r!  
hash_valuebinarybinary_hashr   r'  r(  r   r   r   r   j  s   	







r   ra   )r   Nr   NNFF)r   Nr   r   FN)r   NNNr	   )Nr	   FNNFF)r   NNFT)Qr(   r   r7  r"   rL   rS  r@   r*   r   r   typingr   rQ   r   r   r   r   cupy_backends.cuda.apir   r   cupy_backends.cuda.libsr	   cupyr
   r   ri  r`  rb   r   r$   r   r   rI   r   r   r   r>   memoizer%   r6   rB   rC   rd   ri   r   rz   r   r   r   r   r   r   r   r   r   r   r   r   r  r   r   r   r:   
expanduserr   r   r   dict__annotations__r  r   r   objectr   rU  r[  r^  rb  rf  rh  r   r   r   r   r   <module>   s   
 
1









%
2
X# 
z'3
(