o
    W۷i]                     @  s  U d dl mZ d dlZd dlZd dlZd dlZd dlZd dlZd dlZd dl	Z	d dl
Z
d dlZd dlZd dlmZ d dlmZ d dlmZ d dlmZ d dlmZ d dlmZ d d	lmZ d d
lmZ e Zdae
jdadZdaG dd de Z!G dd de Z"G dd de Z#ddddZ$e% dd Z&deddZ'deddZ(dd Z)e% dd  Z*d!Z+e% d"d# Z,e% d$d% Z-ej%d&d'd(d) Z.ej%d&d'ddd*d+Z/d,d- Z0d.d/ Z1d0d1 Z2d2d3 Z3e3d4d5Z4d5a5d6d7 Z6d8d9 Z7e8e7d:Z9d;d< Z:	>		dfd?d@Z;	>		dfdAdBZ<		C	dgdDdEZ=dFdG Z>ej?@dHZAdIdJ ZBi aCdKeDdL< 		Mdhd5ddd5d5dNdOdPZE	M		5didQdRZFG dSdT dTe ZGG dUdV dVZHdWdX ZIdddYdZZJd[d\ ZKd]d^ ZLdaMd_d` ZN		5	&djdbdcZOdS )k    )annotationsN)device)function)get_rocm_path)driver)runtimenvrtc)_environment)_utilwin32)
--device-cz-dcz	-rdc=truez--relocatable-device-code=truec                   @     e Zd ZdS )NVCCExceptionN__name__
__module____qualname__ r   r   H/home/ubuntu/vllm_env/lib/python3.10/site-packages/cupy/cuda/compiler.pyr   "       r   c                   @  r   )HIPCCExceptionNr   r   r   r   r   r   &   r   r   c                   @  r   )JitifyExceptionNr   r   r   r   r   r   *   r   r   c           
   
   C  s  z>t j}tr"t }|d ur"|t j t jdd }t|}||d< tj	| ||tj
dtr/tjndd}|d ur<|| |W S  tjyj } zd||j|j|j}	|dkrZt|	|dkrbt|	t|	d }~w ty } zd	t| }	t|	|d }~ww )
NPATH Tr   )cwdenvstderruniversal_newlinescreationflagszZ`{}` command returns non-zero exit status. 
command: {}
return-code: {}
stdout/stderr: 
{}nvcchipccz>Failed to run `{0}` command. Check PATH environment variable: )osenviron_win32_get_extra_path_for_msvcpathsepgetcopydeepcopy
subprocesscheck_outputSTDOUTCREATE_NO_WINDOWwriteCalledProcessErrorformatcmd
returncodeoutputr   r   RuntimeErrorOSErrorstr)
r1   r   backend
log_streamr   
extra_pathpathlogemsgr   r   r   _run_cc.   sL   

r>   c                  C  s2   t d} | r	d S t }|r|S t }|r|S d S )Ncl.exe)shutilwhich_get_cl_exe_dir_get_cl_exe_dir_fallback)cl_exe
cl_exe_dirr   r   r   r%   _   s   
r%   return
str | Nonec               
   C  s   z=zdd l } W n ty   Y W d S w | jt j}|D ]}tj	|d}tj
|r2|  W S qtd|  W d S  ty] } ztdt| d|  W Y d }~d S d }~ww )Nr   r?   zcl.exe could not be found in z,Failed to find cl.exe with setuptools.msvc: : )setuptools.msvc	ExceptionmsvcEnvironmentInfoplatformmachineVCToolsr"   r:   joinexistswarningswarntype)
setuptoolsvctoolsr:   rD   r<   r   r   r   rB   q   s*   
rB   c               
   C  s   z&ddl m}  ddlm} || ddi}|  |j  tj	|jj
W S  tyF } ztdt| d|  W Y d }~d S d }~ww )Nr   )Distribution)	build_extnamecupy_cl_exe_discoverz'Failed to find cl.exe with setuptools: rH   )rU   rW   setuptools.command.build_extrX   setup_shlib_compilershlib_compiler
initializer"   r:   dirnameccrJ   rR   rS   rT   )rW   rX   extr<   r   r   r   rC      s   
rC   c                   C  s   t d u rt a t S N)_nvrtc_versionr	   
getVersionr   r   r   r   _get_nvrtc_version   s   re   c                  C  s   ddl m}  | jS )Nr   core)
cupy._corerg   CUPY_CACHE_KEYrf   r   r   r   _get_cupy_cache_key   s   rj   )3253627287c                  C  sB   t  \} }| dkr|dk rd}|S | dkr|dkrd}|S d}|S )N      90120121)re   )majorminornvrtc_max_compute_capabilityr   r   r   _get_max_compute_capability   s   
rx   c                  C  s$   t  \} }tdd t| |D S )Nc                 s  s    | ]}d | V  qdS )-INr   ).0dr   r   r   	<genexpr>   s
    
z._get_extra_include_dir_opts.<locals>.<genexpr>)re   tupler
   $_get_include_dir_from_conda_or_wheel)ru   rv   r   r   r   _get_extra_include_dir_opts   s   
r   T)for_each_devicec                  C  s*   t  } t j}|tv r|S t|| tdS )N)key)rx   r   Devicecompute_capability_tegra_archsminint)rw   archr   r   r   	_get_arch   s
   
r   c                 C  s@   | d u rt  } tst| tt krd|  dfS d|  dfS )Nz	-arch=sm_cubin-arch=compute_ptx)r   _use_ptxr   rx   r   r   r   r   _get_arch_for_options_for_nvrtc   s   r   c                 C     t dd | D S )Nc                 s  s    | ]	}|t v r|V  qd S rb   
_rdc_flagsrz   or   r   r   r|          z'_is_cudadevrt_needed.<locals>.<genexpr>)anyoptionsr   r   r   _is_cudadevrt_needed      r   c                  C  sv   t d urt S ddlm}  |  }|d u rtdtr|d7 }n|d }tj|s-|d7 }n|}tj|s9td|S )Nr   )get_cuda_pathzCUDA is not found.z/lib/x64/cudadevrt.libz/lib64/libcudadevrt.az/lib/libcudadevrt.az>Relocatable PTX code is requested, but cudadevrt is not found.)
_cudadevrt	cupy.cudar   r4   r$   r"   r:   isfile)r   	cudadevrtcudadevrt64r   r   r   _get_cudadevrt_path   s"   

r   c                 C  r   )Nc                 s  s    | ]	}|t vr|V  qd S rb   r   r   r   r   r   r|     r   z%_remove_rdc_option.<locals>.<genexpr>)r}   r   r   r   r   _remove_rdc_option  r   r   c                 C  sH   t j| }|d u st|dkr|S zt|dkW S  ty#   Y dS w )Nr      F)r"   r#   r'   lenr   
ValueError)rY   defaultvalr   r   r   _get_bool_env_variable  s   r   CUPY_COMPILE_WITH_PTXFc              
   C  s   ddl m} tsddlm} |  ||  da| }|d |  } z|| |\}}}}W n* tyY }	 zt	t
|	|||d}
tdd}|rN|
tj tt
|
|	d }	~	ww ||ks`J |||fS )	Nr   jitifyrf   T
r   CUPY_DUMP_CUDA_SOURCE_ON_ERRORF)r   r   #_jitify_header_source_map_populatedrh   rg   _init_module_add_sources_get_header_source_maprJ   CompileExceptionr6   r   dumpsysr   r   )sourcer   cu_pathr   rg   
old_sourcerY   headersinclude_namesr<   cexr   r   r   r   _jitify_prep  s,   

r   c                 C  s   t j| dd S )NF)usedforsecurity)hashlibsha1	hexdigest)valuer   r   r   _hash_hexdigest;  r   r       c                 C  s,   | rt jdtdd d S t jdtdd d S )Nzjitify=True is deprecated and its support is staged for removal in CuPy v15.0.
Please try compiling without jitify using the CCCL headers as needed.
Also see https://nvidia.github.io/cccl/python/ for e.g. Thrust/CUB algorithm exposure to Python.   )
stacklevelz}The jitify argument is deprecated and staged for removal in CuPy v15.0. Avoid passing `jitify=False` to silence this warning.)rR   rS   DeprecationWarningr   r   r   r   _jitify_deprecation_warningB  s   
	
r   r   kern.cuc	              	     s    fdd}	|s>t  )}
tj|
|}t|d}||  W d    n1 s)w   Y  W d    n1 s8w   Y  n|sBdn|}|	| ||||||S )Nc              
     s   |d ur|dks
J |d  f7 }nt js"t \}}||f7 }nd}|r0t| ||\}}}	nd }}	t \}
}|
dkrA|d7 }t| |||	||d}z|||\}}W ||fS  tyr } ztdd	}|rm|	t
j  d }~ww )
Nltor   r   r   rp   )z#--device-as-default-execution-space)name_expressionsmethodr   F)r   is_hipr   r   re   _NVRTCProgramcompiler   r   r   r   r   )r   r   r   r   r8   r   r   arch_optr   r   major_versionminor_versionprogcompiled_objmappingr<   r   r   r   r   _compileZ  s<   

z1_compile_using_nvrtc_no_warning.<locals>._compilewr   )tempfileTemporaryDirectoryr"   r:   rP   openr.   )r   r   r   filenamer   r8   cache_in_memoryr   r   r   root_dirr   cu_filer   r   r   _compile_using_nvrtc_no_warningT  s   $

r   c	           	   
   C  s(   |d urt | t| ||||||||	S rb   )r   r   )	r   r   r   r   r   r8   r   r   r   r   r   r   compile_using_nvrtc  s   r   r   c                 C  s.  ddl m} |st }|dvrtd|dkr|rJ dj|d}| }	|	 }
|
| t Z}|dd }t	j
||}d	| }d
||f }t|d}||  W d    n1 s`w   Y  |s|
d|  |
t|7 }
|
| z	t|
|d| W n ty } ztt|| ||d}tdd}|r|tj |d }~ww |
 }|d |d }|
t|d|f 7 }
|
| z	t|
|d| W n% ty } ztt|| ||d}tdd}|r|tj |d }~ww t|}|d|d|d f7 }|t| }
z	t|
|d| W n ty) } ztt|dd|d}|d }~ww |dkrQt|d}| W  d    W  d    S 1 sKw   Y  n+|dkrxt|d}| W  d    W  d    S 1 srw   Y  nJ |W d    d S W d    d S 1 sw   Y  d S )Nr   )get_nvcc_path)r   r   z,Invalid code_type %s. Should be cubin or ptxr   z'-gencode=arch=compute_{cc},code=sm_{cc})r`   .z%s.cuz%s.%sr   z--%sr    r   Fz--cubinz.o-oz--device-link.cubinr   rbr   )r   r   r   r   r0   splitappendr   r   r"   r:   rP   r   r.   listr>   r   r   r6   r   r   r   r   r(   r   read)r   r   r   r   	code_typeseparate_compilationr8   r   arch_str_nvccr1   r   
first_partr:   r   result_pathr   r<   r   r   cmd_partialobjptx_filebin_filer   r   r   compile_using_nvcc  s   




;
>>$r   c           	   
   C  s   |dkr4|d |f7 }t| }z	||\}}W nN ty3 } ztdd}|r.|tj  d }~ww |dkrbz|d }t| ||ddd	}W n  tya } ztdd}|r\|tj  d }~ww t	d
| t
|tsoJ ddd |  D S )Nr	   z-arch=compute_{}r   Fr    )r   zpreprocess.ptxzpreprocess.cur   )r   Invalid backend %sr   c                 s  s    | ]
}| d r|V  qdS )z//N
startswith)rz   xr   r   r   r|     s    

z_preprocess.<locals>.<genexpr>)r0   r   r   r   r   r   r   r   r   r   
isinstancebytesrP   decode
splitlines)	r   r   r   r7   r   result_r<   r   r   r   r   _preprocess  sD   



r   z~/.cupy/kernel_cachec                   C  s   t jdtS )NCUPY_CACHE_DIR)r"   r#   r'   _default_cache_dirr   r   r   r   get_cache_dir  s   r   dict_empty_file_preprocess_cacher	   )enable_cooperative_groupsr   r8   r   to_ltoirc                C  s   |r	t jr	td|d ur|dkrttddo|dk}t jr3|dkr%dnd}t| ||||||||	S t| ||||||||||	|
S )Nz+Cooperative groups is not supported in HIP.r	   CUPY_CACHE_IN_MEMORYFhiprtcr!   )r   r   r   NotImplementedErrorr   _compile_with_cache_hip_compile_with_cache_cuda)r   r   r   	cache_dirextra_sourcer7   r  r   r8   r   r  r   r   r   r   _compile_module_with_cache  s*   
r  c           !   
   C  s  |d u rt  }|d u rt }|d7 }|r|d7 }|r|d7 }tddr'|d7 }d|v }|
r4|s4|d7 }n|r:|
s:d	}
|
rD|d
krDtd|t 7 }||t |ft| }t|d }|d u rit	d|||}|t|< d||| |t
 f }|d}t||rdnd }|st }|	stj|stj|d	d tj||}tj|r|st|d}| }W d    n1 sw   Y  t|tkr|d t }|td  }t|d}||kr|r|S || |S n	 |d
kr3|	rdn|d }t| ||||||	|
|rdnd 	\}}t|r(|s(t }||d t }|| |  }n|}|s2|!| n"|dkrO|r=t"t|}t#| |||d d||d}ntd| |	st|d}t$j%|dd}|&| |&| |j'}W d    n	1 sw   Y  zt(|| W n
 t)y   Y nw tddrt|d d} | &|  W d    n	1 sw   Y  n	 |r|S || |S )N)z	-ftz=true)z-dlto)r   CUPY_CUDA_COMPILE_WITH_DEBUGF)z--device-debugz--generate-line-info-DCUPY_USE_JITIFY)r  Tr	   zjitify only works with NVRTCr   z%s %s %s %s %sutf-8z.ltoirr   exist_okr   ascii.cur   zcupy.ptxr    r   )r   r   r8   r   dirdeleteCUPY_CACHE_SAVE_CUDA_SOURCEr   )*r   r   r   r   r   re   r   r  r'   r   rj   encoder   r   Moduler"   r:   isdirmakedirsrP   rQ   r   r   r   _hash_lengthloadr   r   	LinkStateadd_ptr_datar   add_ptr_filecomplete_set_mappingr  r   r   NamedTemporaryFiler.   rY   replacePermissionError)!r   r   r   r	  r
  r7   r  r   r8   r   r   r  is_jitify_requestedr   basekey_srcrY   modr:   filedatahashr   
cubin_hashcu_namer   r   lsr   rdctf	temp_pathfr   r   r   r  <  s   














r  c                      sF   e Zd Zd fdd	Zdd Zdd Zdd	 Zd
d Zdd Z  Z	S )r   r	   c                   s,   || _ || _|| _|| _|| _t   d S rb   )_msgr   rY   r   r7   super__init__)selfr=   r   rY   r   r7   	__class__r   r   r5    s   zCompileException.__init__c                 C  s    t | | j| j| j| j| jffS rb   )rT   r3  r   rY   r   r7   r6  r   r   r   
__reduce__  s   zCompileException.__reduce__c                 C  s   t | S rb   )r6   r9  r   r   r   __repr__     zCompileException.__repr__c                 C  s   |   S rb   )get_messager9  r   r   r   __str__  r<  zCompileException.__str__c                 C  s   | j S rb   )r3  r9  r   r   r   r=    s   zCompileException.get_messagec                 C  s   | j d}tttt|d }d|}|d| j	
  |d|  |d |d| j |dd	| j |d
 t|D ]\}}|||d |  d  qO|d |  d S )Nr   r   z
{{:0{}d}} z{} zcompilation error: {}
z-----
z	Name: {}
zOptions: {}
 zCUDA source:
)r   r   r   mathfloorlog10r   r0   r.   r7   upperrY   rP   r   	enumeraterstripflush)r6  r2  linesdigits	linum_fmtiliner   r   r   r     s   


"
zCompileException.dumpr   )
r   r   r   r5  r:  r;  r>  r=  r   __classcell__r   r   r7  r   r     s    r   c                   @  s2   e Zd Z		dddZejfddZdd	d
ZdS )r   default_programr   Nr   c                 C  s\   d | _ t|tr|d}t|tr|d}|| _|| _t||||| _ || _|| _	d S )NzUTF-8)
ptrr   r   r   srcrY   r	   createProgramr   r   )r6  rO  rY   r   r   r   r   r   r   r   r5    s   




z_NVRTCProgram.__init__c                 C  s$   | rd S | j rt| j  d S d S rb   )rN  r	   destroyProgram)r6  is_shutting_downr   r   r   __del__  s
   z_NVRTCProgram.__del__c              	   C  s  zh| j r| j D ]	}t| j| qt| j| d }| j r.i }| j D ]}t| j|||< q"|d ur;|t| j | jdkrIt	| j|fW S | jdkrWt
| j|fW S | jdkret| j|fW S td tjy   t| j}t|| j| j|tjsddw )Nr   r   r   zUnknown NVRTC compile methodr	   r  )r   r	   addNameExpressionrN  compileProgramgetLoweredNamer.   getProgramLogr   getCUBINgetPTXgetLTOIRr4   
NVRTCErrorr   rO  rY   r   r   )r6  r   r8   kerr   r;   r   r   r   r     s6   




z_NVRTCProgram.compile)rM  r   r   Nr   )r   N)r   r   r   r5  r   rR  rS  r   r   r   r   r   r     s    
r   c                 C  s   t d| d uS )Nz^[a-zA-Z_][a-zA-Z_0-9]*$)rematch)rY   r   r   r   is_valid_kernel_name)  s   r_  c                 C  sX  ddgt | }t }tj|d}|d }|d }t|d}	|	|  W d    n1 s1w   Y  ||d|g7 }z	t||d|}
W n% t	yk } zt
t|| ||d}tdd	}|re|tj |d }~ww tj|szt	d
||
t|d}	|	 W  d    W  d    S 1 sw   Y  W d    d S 1 sw   Y  d S )Nr!   z--gencokern.cpp.hsacor   r   r   FzN`hipcc` command does not generate output file. 
command: {}
stdout/stderr: 
{}r   )r   r   r   r"   r:   rP   r   r.   r>   r   r   r6   r   r   r   r   r   r0   r   )r   r   r   r8   r1   r   r:   in_pathout_pathr2  r3   r<   r   r   r   r   r   compile_using_hipcc-  sD   

"re  c              	   C  s   ddgt | }t F}tj|d}d| }t|d}||  W d    n1 s-w   Y  || t	||d}t
|tsDJ tdd|W  d    S 1 sUw   Y  d S )Nr!   z--preprocessr`  z%s.cppr   z	(?m)^#.*$r   )r   r   r   r"   r:   rP   r   r.   r   r>   r   r6   r]  sub)r   r   r1   r   r:   r   r   pp_srcr   r   r   _preprocess_hipccU  s   

$rh  c              
   C  st   t dkrd}nd}t|}z	||\}}W n ty0 } ztdd}|r+|tj  d }~ww t|t	s8J |S )Nthz}
        // hiprtc segfaults if the input code is empty
        __global__ void _cupy_preprocess_dummy_kernel_() { }
        z
        // hiprtc segfaults if the input code is empty
        #include <hip/hip_runtime.h>
        __global__ void _cupy_preprocess_dummy_kernel_() { }
        r   F)
_cuda_hip_versionr   r   r   r   r   r   r   r   r   )r   r   coder   r   r   r<   r   r   r   r   _preprocess_hiprtcd  s"   rl  c                 C  s   |sd|  S t dkr| S t dkrd|  S td u r/|d ur/|d}dd |D }d| a}| d} dd | D } dt d|  } | S )	Nz#include <hip/hip_runtime.h>
ri  i  r   c                 S  s$   g | ]}| d s| ds|qS )#includez#pragma oncer   rz   rK  r   r   r   
<listcomp>  s
    
z*_convert_to_hip_source.<locals>.<listcomp>c                 S  s   g | ]	}| d s|qS )rm  r   rn  r   r   r   ro    s    z7#include <hip/hip_runtime.h>
#include <hip/hip_fp16.h>
)rj  _hip_extra_sourcer   rP   )r   r
  	is_hiprtcr   r   r   _convert_to_hip_source  s&   

rr  r  c
                 C  s  t |rtd|d7 }t }
|
dkr"|
dk r"|dt  d f7 }|d u r)t }|d u r2t j}|	r=t	| ||dkd} ||t
 |f}t|d }|d u ra|dkrXtd	|}ntd	|}|t|< d
||| |f }|d}t|d }t }|stj|stj|dd tj||}tj|r|st|d}| }W d    n1 sw   Y  t|tkr|d t }|td  }t|d}||kr|| |S n	 |dkrt| |||d |||\}}|| nt | |||}|sZt|d}t!j"|dd}|#| |#| |j$}W d    n	1 sw   Y  zt%|| W n
 t&y5   Y nw t'ddrYt|d d}|#|  W d    n	1 sTw   Y  n	 || |S )Nz,separate compilation is not supported in HIP)z-fcuda-flush-denormals-to-zeroifi iry   z/llvm/lib/clang/13.0.0/include/r  )rq  r   z%s %s %s %sr  rb  Tr  r   r  r  Fr  r  ra  r   )(r   r   r   get_build_versionr   r   r   r   r   rr  re   r  r'   rl  rh  r  r   r   r  r"   r:   r  r  rP   rQ   r   r   r   r  r  r   r!  re  r   r"  r.   rY   r#  r$  r   )r   r   r   r	  r
  r7   r   r8   r   use_converterrocm_build_versionr   r&  r'  rY   r(  r:   r2  r*  
hash_valuebinarybinary_hashr   r0  r1  r   r   r   r    s   	







r  rb   )rF   rG   )r   Nr   NNFNN)r   Nr   r   FN)r   NNNr	   )Nr	   FNNFFF)r  NNFT)P
__future__r   r(   r   r@  r"   rM   r]  r@   r*   r   r   rR   r   r   r   r   cupy_backends.cuda.apir   r   cupy_backends.cuda.libsr	   cupyr
   r   rs  rj  rc   r   r$   r   r   rJ   r   r   r   r>   memoizer%   rB   rC   re   rj   r   rx   r   r   r   r   r   r   r   r   r   r   r   r   r  r   r   r   r   r   r:   
expanduserr   r   r  __annotations__r  r  r   r   r_  re  rh  rl  rp  rr  r  r   r   r   r   <module>   s    
1











%
8

X# 
 '3
(