o
    Ti                     @   s  d dl Z d dlZd dlZd dlZd dlZd dlmZ d dlZd dlZd dl	Z	d dl
Z
d dlZd dlZd dlZd dlmZmZ d dlmZmZ d dlmZ dZdZe de Zd	Zd
Zzd dlZW n eyq   ee d Y nw eej !dd  Z"eej !dd Z#G dd de$Z%G dd de$Z&d#ddZ'dd Z(g dg dg ddZ)d#ddZ*G dd deZ+G dd  d e+Z,G d!d" d"e,Z-dS )$    N)Path)CompileError	LinkError)ABCabstractmethod)Listz[93mz[0mz [WARNING] z/tmp/torch_extensionsz6.0;6.1;7.0zX unable to import torch, please install it if you want to pre-compile any deepspeed ops..   c                   @      e Zd ZdS )MissingCUDAExceptionN__name__
__module____qualname__ r   r   T/home/ubuntu/.local/lib/python3.10/site-packages/deepspeed/ops/op_builder/builder.pyr   '       r   c                   @   r
   )CUDAMismatchExceptionNr   r   r   r   r   r   +   r   r    c           	      C   s   dd l }|jjj}|d u rtdtj|d dgdd}| }|d}||d  	d	d
d}|d d \}}t
|t
|fS )Nr   z6CUDA_HOME does not exist, unable to compile CUDA op(s)z	/bin/nvccz-VT)universal_newlinesreleaser	   ,r   r      )torch.utils.cpp_extensionutilscpp_extension	CUDA_HOMEr   
subprocesscheck_outputsplitindexreplaceint)	nametorch	cuda_homeoutputoutput_splitrelease_idxr   
cuda_major
cuda_minorr   r   r   installed_cuda_version/   s   

r+   c                  C   s   t } dd l}|jjjd urMt d dkr7t d dkr| d7 } t d dkr*| d7 } t d dkr5| d7 } | S t d dkrM| d	7 } t d dkrM| d
7 } | S )Nr      r	   z;8.0z;8.6   z;9.0   z;8.0;8.6;9.0z
;10.0;12.0)DEFAULT_COMPUTE_CAPABILITIESr   r   r   r   r+   )compute_capsr$   r   r   r    get_default_compute_capabilities>   s    r1   )z10.0z10.1z10.2)	z11.0z11.1z11.2z11.3z11.4z11.5z11.6z11.7z11.8)z12.0z12.1z12.2z12.3z12.4z12.5z12.6z12.8)
   r,   r.   c                 C   s   t | \}}| d| }dtjjdd d }||krd|tv r>|t| v r>|t| v r>td| dtjj d dS t	ddd	krWtt
 d
| dtjj d dS td| dtjj ddS )Nr   r   zInstalled CUDA version z4 does not match the version torch was compiled with z> but since the APIs are compatible, accepting this combinationTDS_SKIP_CUDA_CHECK01z. DeepSpeed Op Builder: Installed CUDA version zn.Detected `DS_SKIP_CUDA_CHECK=1`: Allowing this combination of CUDA, but it may result in unexpected behavior.z0>- DeepSpeed Op Builder: Installed CUDA version zH, unable to compile cuda/cpp extensions without a matching cuda version.)r+   joinr$   versioncudar   cuda_minor_mismatch_okprintosgetenvWARNINGr   )r#   r)   r*   sys_cuda_versiontorch_cuda_versionr   r   r   assert_no_cuda_mismatch[   s.   

r@   c                       sD  e Zd ZdZdZdZdZdZi Zdd Z	e
dd Ze
dd Zdd	 Zd
d Zedd Zedd Zedd Zedd Zedd Zedd Zedd Zdd Zdd Zdd Zd>d!d"Zd#d$ Zd?d%d&Zd'd( Zd)d* Zd+d, Zd-d. Z d/d0 Z!d1d2 Z"d3d4 Z#d5d6 Z$d7d8 Z%d@ fd:d;	Z&d@ fd<d=	Z'  Z(S )A	OpBuilderNc                 C   s"   || _ d| _d| _d| _d | _d S NF)r#   jit_modebuild_for_cpuenable_bf16	error_log)selfr#   r   r   r   __init__}   s
   
zOpBuilder.__init__c                 C      dS )z
        Returns absolute build path for cases where the op is pre-installed, e.g., deepspeed.ops.adam.cpu_adam
        will be installed as something like: deepspeed/ops/adam/cpu_adam.so
        Nr   rG   r   r   r   absolute_name   s   zOpBuilder.absolute_namec                 C   rI   )z}
        Returns list of source files for your op, relative to root of deepspeed package (i.e., DeepSpeed/deepspeed)
        Nr   rJ   r   r   r   sources   s   zOpBuilder.sourcesc                 C      d S Nr   rJ   r   r   r   hipify_extension      zOpBuilder.hipify_extensionc                 C   rM   rN   r   rJ   r   r   r   sycl_extension   rP   zOpBuilder.sycl_extensionc                 C   sB   | d }d tjdd d }||krtd| d| d S )Nr7   r   r   zPyTorch version mismatch! DeepSpeed ops were compiled and installed with a different version than what is being used at runtime. Please re-install DeepSpeed or switch torch versions. Install torch version=z, Runtime torch version=)r6   r$   __version__r   RuntimeError)
torch_infoinstall_torch_versioncurrent_torch_versionr   r   r   validate_torch_version   s   z OpBuilder.validate_torch_versionc                 C   s   t  s&dtjjdd d }| d }||kr$td| d| d S dtjjdd d }| d }||krFtd| d| d S )	Nr   r   cuda_versionzCUDA version mismatch! DeepSpeed ops were compiled and installed with a different version than what is being used at runtime. Please re-install DeepSpeed or switch torch versions. Install CUDA version=z, Runtime CUDA version=hip_versionzHIP version mismatch! DeepSpeed ops were compiled and installed with a different version than what is being used at runtime. Please re-install DeepSpeed or switch torch versions. Install HIP version=z, Runtime HIP version=)	rA   is_rocm_pytorchr6   r$   r7   r8   r   rS   hip)rT   current_cuda_versioninstall_cuda_versioncurrent_hip_versioninstall_hip_versionr   r   r   validate_torch_op_version   s&   z#OpBuilder.validate_torch_op_versionc                  C   s   t jd urt jS d} zdd l}W n	 ty   Y n%w tdks&tdkr>tdkr>t|jdo1|jjd u} | r>ddl	m
} |d u} | t _t jS )NFr   r	      r[   	ROCM_HOME)rA   _is_rocm_pytorchr$   ImportErrorTORCH_MAJORTORCH_MINORhasattrr7   r[   r   rc   )rd   r$   rc   r   r   r   rZ      s   
zOpBuilder.is_rocm_pytorchc                  C   sF   t jd urt jS d} ztjddgdd}W n   Y nd} | t _t jS )NFc2sz	--versionT)capture_output)rA   _is_sycl_enabledr   run)rk   resultr   r   r   is_sycl_enabled   s   
zOpBuilder.is_sycl_enabledc                  C   sX  t jrt jS d} d}d}t  rddlm} t|d}| r;t|d}|	 }W d    n1 s5w   Y  ndt
jv rHt
jdd }|dkr[|d	d } |d	d }n=td
}| rt|d(}| D ]}d|v r|td|d } qmd|v rtd|d }qmW d    n1 sw   Y  | dkrJ dt| t|ft _t jS )Nr4   r   r   rb   z.info/versionrrocmr	   r   z/usr/include/rocm_version.hz#define ROCM_VERSION_MAJORz\S+r   z#define ROCM_VERSION_MINORFzCould not detect ROCm version)rA   _rocm_versionrZ   r   rc   r   joinpathis_fileopenreadr$   rR   r   	readlinesrefindallr"   )
ROCM_MAJOR
ROCM_MINORROCM_VERSION_DEV_RAWrc   rocm_ver_filefilelnr   r   r   installed_rocm_version   sB   

z OpBuilder.installed_rocm_versionc                  C   v   t jrt jS td} |  std} t| d }ztj|dd}|d }W n tj	y4   d}Y nw |t _t jS )N/opt/rocm/bin/rocminforocminfoz | grep -o -m 1 'gfx.*'Tshellutf-8r   )
rA   _rocm_gpu_archr   rs   strr   r   decodestripCalledProcessError)	rocm_inforocm_gpu_arch_cmdrm   rocm_gpu_archr   r   r   get_rocm_gpu_arch   s   zOpBuilder.get_rocm_gpu_archc                  C   r   )Nr   r   zG | grep -Eo -m1 'Wavefront Size:[[:space:]]+[0-9]+' | grep -Eo '[0-9]+'Tr   r   32)
rA   _rocm_wavefront_sizer   rs   r   r   r   r   r   r   )r   rocm_wavefront_size_cmdrm   rocm_wavefront_sizer   r   r   get_rocm_wavefront_size  s$   z!OpBuilder.get_rocm_wavefront_sizec                 C      g S )zr
        Returns list of include paths, relative to root of deepspeed package (i.e., DeepSpeed/deepspeed)
        r   rJ   r   r   r   include_paths!     zOpBuilder.include_pathsc                 C   r   )zg
        Returns optional list of compiler flags to forward to nvcc when building CUDA sources
        r   rJ   r   r   r   	nvcc_args'  r   zOpBuilder.nvcc_argsc                 C   r   )zQ
        Returns optional list of compiler flags to forward to the build
        r   rJ   r   r   r   cxx_args-  r   zOpBuilder.cxx_argsFc                 C   rI   )zU
        Check if all non-python dependencies are satisfied to build this op
        Tr   rG   verboser   r   r   is_compatible3  r   zOpBuilder.is_compatiblec                 C   r   rN   r   rJ   r   r   r   extra_ldflags9  rP   zOpBuilder.extra_ldflagsc                 C   s   d}d}d}zUz|rt jd t jj|d}t j| t }d||f }	t	j
|d}
t|
d}||	 W d   n1 sDw   Y  |shtt	j
|dd}t	tj }t	| tj  t	j
|
\}}t	j
|rz|d nd	}t	j
||}tt	jd
d	}|j|
g|| |d}tt	jdd	}|j|t	j
|d| |||d W W |durt	|tj  |dur|  |durt| dS dS  ty   Y W |durt	|tj  |dur|  |durt| dS dS  t y3   Y W |durt	|tj  |dur%|  |dur1t| dS dS    Y W |durFt	|tj  |durO|  |dur[t| dS dS |durkt	|tj  |durt|  |durt| w w )a  
        Test for existence of a function within a tuple of libraries.

        This is used as a smoke test to check whether a certain library is available.
        As a test, this creates a simple C program that calls the specified function,
        and then distutils is used to compile that program and link it with the specified libraries.
        Returns True if both the compile and link are successful, False otherwise.
        Nr	   )r   zBvoid %s(void); int main(int argc, char** argv) { %s(); return 0; }ztest.cwz
stderr.txtr   r   CFLAGS)
output_dirextra_preargsLDFLAGSza.out)r   	librarieslibrary_dirsTF)!	distutilslogset_verbosity	ccompilernew_compiler	sysconfigcustomize_compilertempfilemkdtempr;   pathr6   rt   writedupsysstderrfilenodup2
splitdriveisabsshlexr   environgetcompilestrip_empty_entrieslink_executablecloseshutilrmtreer   r   )rG   funcnamer   r   r   tempdir
filestderr	oldstderrcompilerprogfilenamefdrivedriveless_filenameroot_dirr   cflagsobjsldflagsr   r   r   has_function<  s   	










zOpBuilder.has_functionc                 C      dd |D S )zP
        Drop any empty strings from the list of compile and link flags
        c                 S   s   g | ]
}t |d kr|qS )r   )len.0xr   r   r   
<listcomp>  s    z1OpBuilder.strip_empty_entries.<locals>.<listcomp>r   )rG   argsr   r   r   r     s   zOpBuilder.strip_empty_entriesc                 C   s   zddl m} W n! ty) } z|  }|d u rW Y d }~dS W Y d }~nd }~ww z| }W n2 tya } z&| | j dt| d| d |  }|d u rWW Y d }~dS W Y d }~nd }~ww |d drkd	S dS )
Nr   get_cpu_infoz-march=native9 attempted to use py-cpuinfo but failed (exception type: , 1), falling back to lscpu to get this information.archPPC_z-mcpu=native)	cpuinfor   re   _backup_cpuinfo	Exceptionwarningr#   type
startswithrG   r   ecpu_infor   r   r   cpu_arch  s,   
"zOpBuilder.cpu_archc                 C   sJ   z|   st| j W dS W dS  ty$   tt d| j d Y dS w )Nz-D__ENABLE_CUDA__ zW cuda is missing or is incompatible with installed torch, only cpu ops can be compiled!z-D__DISABLE_CUDA__)rZ   r@   r#   r   r:   r=   rJ   r   r   r   get_cuda_compile_flag  s   
zOpBuilder.get_cuda_compile_flagc                 C   s   |  ds| | j d d S tdg}|d  }i }d |d< d|d< d|v s1d|v r\d	|d< d
|v rB|d  d7  < nd|v rN|d  d7  < d|v rZ|d  d7  < |S d|v rdd|d< |S )Nlscpuz attempted to query 'lscpu' after failing to use py-cpuinfo to detect the CPU architecture. 'lscpu' does not appear to exist on your system, will fall back to use -march=native and non-vectorized execution.r   r   r   flagsgenuineintelauthenticamdX86_64avx512zavx512,avx512fzavx512f,avx2ppc64ler   )command_existsr   r#   r   r   r   r   lower)rG   rm   r   r   r   r   r     s(   
zOpBuilder._backup_cpuinfoc                 C   s   zddl m} W n! ty) } z|  }|d u rW Y d }~dS W Y d }~nd }~ww z| }W n2 tya } z&| | j dt| d| d |  }|d u rWW Y d }~dS W Y d }~nd }~ww |d dkr~d	|d
 v std|d
 v rvdS d|d
 v r~dS dS )Nr   r   z-D__SCALAR__r   r   r   r   r   r   r   r   z-D__AVX512__r   z-D__AVX256__)r   r   re   r   r   r   r#   r   r   r   r   r   
simd_width  s2   
"zOpBuilder.simd_widthc                 C   s   d|v r
| d}n|g}d}|D ]}ddd| g}tj|tjd}|p*| dk}q|sDt|dkrDtt d	| j d
| d |S |sZt|dkrZtt d	| j d| d |S )N|Fbashz-cztype )stdoutr   r	   r   z) requires one of the following commands 'z', but it does not exist!z requires the 'z!' command, but it does not exist!)	r   r   PopenPIPEwaitr   r:   r=   r#   )rG   cmdcmdsvalidsafe_cmdrm   r   r   r   r     s   zOpBuilder.command_existsc                 C   s   | | _ tt d|  d S )Nr   )rF   r:   r=   )rG   msgr   r   r   r     s   zOpBuilder.warningc                 C   s*   t j|r|S t jttjj |S rN   )r;   r   r   r6   r   __file__parentabsolute)rG   	code_pathr   r   r   deepspeed_src_path  s   zOpBuilder.deepspeed_src_pathc              	   C   s\   ddl m} dd | |  D }||  | |  |d| |  i| |  dS )Nr   CppExtensionc                 S      g | ]}t j|qS r   r;   r   abspathr   r   r   r   r         z%OpBuilder.builder.<locals>.<listcomp>cxx)r#   rL   include_dirsextra_compile_argsextra_link_args)r   r  r   r   rK   rL   r   r   )rG   r  r  r   r   r   builder  s   zOpBuilder.builderTc                    s   | j  jv r j| j  S ddlm}m}m} ddlm} || j drL|| j	krL| 
| tj r=t| tr=| | t|  }| j| j < |S | |S )Nr   )installed_opsrT   accelerator_name)get_acceleratorF)r#   _loaded_opsdeepspeed.git_version_infor  rT   r  deepspeed.acceleratorr  r   _namerW   r$   r8   is_available
isinstanceCUDAOpBuilderr`   	importlibimport_modulerK   jit_load)rG   r   r  rT   r  r  	op_module	__class__r   r   load  s   


zOpBuilder.loadc              
      s    |std j d j zdd l}W n ty'   td j dw t tr8  s8t	j
   _d _ddlm} t } fdd  D } fd	d  D }d }d
tjv rmtjd
}dtjd
<    }   }	t tr js jr|	d |d |d |d |d   r|	d   tjd< |	d    | j | ||	|   t trՈ jsdnd |d}
t | }|rtd j d| d |r|tjd
< |
j j< |
S )NzUnable to JIT load the zC op due to it not being compatible due to hardware/software issue. r   z% op due to ninja not being installed.T)r  c                       g | ]}t j |qS r   r;   r   r  r  r   r   rJ   r   r   r   /      z&OpBuilder.jit_load.<locals>.<listcomp>c                    r   r   r!  r"  rJ   r   r   r   0  r#  TORCH_CUDA_ARCH_LISTr   -DBF16_AVAILABLEz -U__CUDA_NO_BFLOAT16_OPERATORS__z!-U__CUDA_NO_BFLOAT162_OPERATORS__z"-U__CUDA_NO_BFLOAT16_CONVERSIONS__-D__HIP_PLATFORM_AMD__=1PYTORCH_ROCM_ARCH-DROCM_WAVEFRONT_SIZE=%s)r#   rL   extra_include_pathsextra_cflagsextra_cuda_cflagsr   	with_cudar   zTime to load z op: z seconds) r   rS   r#   rF   ninjare   r  r  rZ   r$   r8   r  rD   rC   r   r  timerL   r   r;   r   r   r   r   r   rE   appendr   r   r   r:   r  )rG   r   r-  r  start_buildrL   r)  torch_arch_listr   r   r  build_durationr  rJ   r   r    sb   









	
zOpBuilder.jit_loadFrB   )T))r   r   r   rq   r   r   rd   rk   r  rH   r   rK   rL   rO   rQ   staticmethodrW   r`   rZ   rn   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r  r  r  r  __classcell__r   r   r  r   rA   u   sZ    







"



R	rA   c                       sj   e Zd ZdddZdee fddZdd Zd fd
d	Zdd Z	dd Z
dd Zdd Zdd Z  ZS )r  Nc           
      C   sn  g }| j r4ttj D ]}tj|\}}| d| }||vr&|| qt|}|d  d7  < n.tj	
dd}|durV|durOtt d| d|  |dd	}n|du r]t }|d	}| |}t|d
krvtd| j dg }d| _|D ]7}|d
 |d dd
  }	|d|	 d|	  |d dr|d|	 d|	  t|d
 dkrd| _q}|S )a_  
        Returns nvcc compute capability compile flags.

        1. `TORCH_CUDA_ARCH_LIST` takes priority over `cross_compile_archs`.
        2. If neither is set default compute capabilities will be used
        3. Under `jit_mode` compute capabilities of all visible cards will be used plus PTX

        Format:

        - `TORCH_CUDA_ARCH_LIST` may use ; or whitespace separators. Examples:

        TORCH_CUDA_ARCH_LIST="6.1;7.5;8.6;9.0;10.0" pip install ...
        TORCH_CUDA_ARCH_LIST="6.0 6.1 7.0 7.5 8.0 8.6 9.0 10.0+PTX" pip install ...

        - `cross_compile_archs` uses ; separator.

        r   z+PTXr$  Nz env var TORCH_CUDA_ARCH_LIST=z overrides cross_compile_archs=r   ;r   zUnable to load z< op due to no compute capabilities remaining after filteringTr	   +z-gencode=arch=compute_z	,code=sm_z,code=compute_   F)rC   ranger$   r8   device_countget_device_capabilityr/  sortedr;   r   r   r:   r=   r!   r1   r   
filter_ccsr   rS   r#   rE   endswithr"   )
rG   cross_compile_archsccsiCC_MAJORCC_MINORcccross_compile_archs_envr   numr   r   r   compute_capability_argsc  sH   


z%CUDAOpBuilder.compute_capability_argsrA  c                 C   r   )z
        Prune any compute capabilities that are not compatible with the builder. Should log
        which CCs have been pruned.
        c                 S   s   g | ]}| d qS )r   )r   )r   rE  r   r   r   r     s    z,CUDAOpBuilder.filter_ccs.<locals>.<listcomp>r   )rG   rA  r   r   r   r>    s   zCUDAOpBuilder.filter_ccsc                 C   sr   g }t dkst dkrtdkrdg}g }t dkst dkr"tdkr"dg}g }t dks0t dkr3tdkr3dg}|| | S )Nr	   r   z-DVERSION_GE_1_1r   z-DVERSION_GE_1_3   z-DVERSION_GE_1_5)rf   rg   )rG   version_ge_1_1version_ge_1_3version_ge_1_5r   r   r   version_dependent_macros  s   z&CUDAOpBuilder.version_dependent_macrosFc                    s   t  |S rN   )superr   r   r  r   r   r     s   zCUDAOpBuilder.is_compatiblec           	   
   C   s  z|   s
t| j d| _W n ty   d| _Y nw | jr%ddlm} nddlm} dd | | 	 D }| jrCd| | 
 in| | 
 | |  d	}| jsf| jrf|d d
 |d d
 |   r|d d |d d|    |d d|    |  r|  tjd< ||  | |  || |  || |  d}|   r|j}ttjj}tt|D ]}t|| }| rt||||< qt|||< q||_|S )NFTr   r  )CUDAExtensionc                 S   r  r   r  r   r   r   r   r     r	  z)CUDAOpBuilder.builder.<locals>.<listcomp>r
  )r
  nvccr%  rP  r&  r(  r'  )r#   rL   r  r   r  r  )rZ   r@   r#   rD   r   r   r  rO  r   r   r   r   rE   r/  r   r   r;   r   rK   rL   libraries_argsr   r   r   r   r:  r   is_absoluter   relative_to)	rG   ExtensionBuilderr  compile_argscuda_extrL   	curr_filerB  srcr   r   r   r    sT   


zCUDAOpBuilder.builderc              
   C   s`   |   r.ddlm} |jt t |  tjt dgdd | 	 D dddd d S d S )Nr   )hipify_python*c                 S   r  r   r  )r   sr   r   r   r     r	  z2CUDAOpBuilder.hipify_extension.<locals>.<listcomp>T)project_directoryoutput_directoryheader_include_dirsincludesextra_filesshow_detailedis_pytorch_extensionhipify_extra_files_only)
rZ   torch.utils.hipifyrY  hipifyr;   getcwdr   r   r6   rL   )rG   rY  r   r   r   rO     s   
zCUDAOpBuilder.hipify_extensionc                 C   s   t jdkrdgS g dS )Nwin32z-O2)-O3
-std=c++17-gz-Wno-reorder)r   platformrJ   r   r   r   r     s   
zCUDAOpBuilder.cxx_argsc              	   C   s  | j rg S dg}|  r"|  \}}|ddddd| d| g7 }|S zttdd	}|d
kr3td	W n tyD   tt d}Y nw t	 \}}|dkr\|dkrY|dkrYd}nd}nd}|t
jdkrfdnd	d|dddd| g7 }tjdddkr|d ||  7 }|S )Nrh  ri  z-U__HIP_NO_HALF_OPERATORS__z-U__HIP_NO_HALF_CONVERSIONS__z-U__HIP_NO_HALF2_OPERATORS__z-DROCM_VERSION_MAJOR=%sz-DROCM_VERSION_MINOR=%sDS_NVCC_THREADSr   r   r-   r2   r.   ra   z
-std=c++20z
-std=c++14rg  z-allow-unsupported-compilerz--use_fast_mathz-U__CUDA_NO_HALF_OPERATORS__z-U__CUDA_NO_HALF_CONVERSIONS__z-U__CUDA_NO_HALF2_OPERATORS__z
--threads=DS_DEBUG_CUDA_BUILDr4   r5   z--ptxas-options=-v)rD   rZ   r   r"   r;   r<   
ValueErrormin	cpu_countr+   r   rk  r   r   r/  rH  )rG   r   ry   rz   nvcc_threadsr)   r*   std_libr   r   r   r     sF   

zCUDAOpBuilder.nvcc_argsc                 C   s    | j rg S tjdkrddgS g S )Nrg  cublascurand)rD   r   rk  rJ   r   r   r   rQ  #  s
   
zCUDAOpBuilder.libraries_argsrN   r3  )r   r   r   rH  r   r   r>  rM  r   r  rO   r   r   rQ  r5  r   r   r  r   r  a  s    
=2&r  c                       s,   e Zd Zdd Zdd Z fddZ  ZS )TorchCPUOpBuilderc                 C   s`   dd l }|  s$tj|jjjd}tj|s"tj|jjjd}|S tj|jjj	d}|S )Nr   lib64lib)
r$   rZ   r;   r   r6   r   r   r   existsrc   )rG   r$   
CUDA_LIB64r   r   r   get_cuda_lib64_path/  s   z%TorchCPUOpBuilder.get_cuda_lib64_pathc                 C   s<   | j rdgS |  sdg}| j s|d|    |S g S )N-fopenmpz-lcurand-L)rD   rZ   r/  rz  )rG   ld_flagsr   r   r   r   9  s   zTorchCPUOpBuilder.extra_ldflagsc                    sb   g }| j s|  }|t  7 }|d| dddg7 }|  }|  }|  }||d||g7 }|S )Nr|  z-lcudartz-lcublasrj  r{  )rD   rz  rN  r   r   r   r   )rG   r   ry  CPU_ARCH
SIMD_WIDTHCUDA_ENABLEr  r   r   r   E  s(   zTorchCPUOpBuilder.cxx_args)r   r   r   rz  r   r   r5  r   r   r  r   ru  -  s    
ru  )r   ).r;   rw   r   r.  r  pathlibr   r   r   r   r   distutils.ccompilerr   distutils.logdistutils.sysconfigdistutils.errorsr   r   abcr   r   typingr   YELLOWENDr=   DEFAULT_TORCH_EXTENSION_PATHr/   r$   re   r:   r"   rR   r   rf   rg   r   r   r   r+   r1   r9   r@   rA   r  ru  r   r   r   r   <module>   sX   

   o M