o
    Ti-                     @   s,   d dl Z ddlmZmZ G dd deZdS )    N   )CUDAOpBuilderinstalled_cuda_versionc                       sd   e Zd ZdZdZd fdd	Zdd Zd fd	d
	Zdd Zdd Z	dd Z
dd Zdd Z  ZS )InferenceCutlassBuilderDS_BUILD_CUTLASS_OPScutlass_opsNc                    s$   |d u r| j n|}t j|d d S )N)name)NAMEsuper__init__)selfr   	__class__ f/home/ubuntu/.local/lib/python3.10/site-packages/deepspeed/ops/op_builder/inference_cutlass_builder.pyr      s   z InferenceCutlassBuilder.__init__c                 C   s   d| j  S )Nz+deepspeed.inference.v2.kernels.cutlass_ops.)r	   )r   r   r   r   absolute_name   s   z%InferenceCutlassBuilder.absolute_nameFc                    s   zdd l }W n ty   |r| d Y dS w d}|  s\|j r\t \}}t|jj	dd }|j
dj}|dk rG|rE| d d}|dkr\|d	k sS|d	k r\|rZ| d
 d}t |oc|S )Nr   z?Please install torch if trying to pre-compile inference kernelsFT.   zDNVIDIA Inference is only supported on Pascal and newer architectures      z6On Ampere and higher architectures please use CUDA 11+)torchImportErrorwarningis_rocm_pytorchcudais_availabler   intversionsplitget_device_propertiesmajorr
   is_compatible)r   verboser   	cuda_okaysys_cuda_major_torch_cuda_majorcuda_capabilityr   r   r   r!      s,   



z%InferenceCutlassBuilder.is_compatiblec                 C   sb   g }g }dd |D D ]}t |d dkr|| q|| qt|dkr/| d|  |S )Nc                 S   s   g | ]}| d qS )r   )r   ).0ccr   r   r   
<listcomp>0   s    z6InferenceCutlassBuilder.filter_ccs.<locals>.<listcomp>r   r   zFiltered compute capabilities )r   appendlenr   )r   ccsccs_retained
ccs_prunedr)   r   r   r   
filter_ccs-   s   z"InferenceCutlassBuilder.filter_ccsc                 C   s   |  d}tj|rdS dS )N	deepspeedz..)deepspeed_src_pathospathisdir)r   ds_pathr   r   r   
get_prefix:   s   
z"InferenceCutlassBuilder.get_prefixc                    &   g d}|     fdd|D }|S )N)z0inference/v2/kernels/cutlass_ops/cutlass_ops.cppz9inference/v2/kernels/cutlass_ops/mixed_gemm/mixed_gemm.cuz5inference/v2/kernels/cutlass_ops/moe_gemm/moe_gemm.cuc                       g | ]	}t j |qS r   r3   r4   joinr(   srcprefixr   r   r*   F       z3InferenceCutlassBuilder.sources.<locals>.<listcomp>r7   r   sourcesr   r>   r   rC   >   s   zInferenceCutlassBuilder.sourcesc                 C   sX   dd l }| }|  }tj||}| |}d| dg}| jr*|d|  |S )Nr   z-Lz-ldeepspeedftz-Wl,-rpath,)		dskernelslibrary_pathr7   r3   r4   r;   r2   jit_loadr+   )r   rD   lib_pathr?   argsr   r   r   extra_ldflagsI   s   
z%InferenceCutlassBuilder.extra_ldflagsc                    r8   )N)zinference/v2/kernels/includesz+inference/v2/kernels/cutlass_ops/mixed_gemmz)inference/v2/kernels/cutlass_ops/moe_gemmz2inference/v2/kernels/cutlass_ops/shared_resources/c                    r9   r   r:   r<   r>   r   r   r*   ^   r@   z9InferenceCutlassBuilder.include_paths.<locals>.<listcomp>rA   rB   r   r>   r   include_pathsU   s   z%InferenceCutlassBuilder.include_paths)N)F)__name__
__module____qualname__	BUILD_VARr	   r   r   r!   r0   r7   rC   rI   rJ   __classcell__r   r   r   r   r   
   s    r   )r3   builderr   r   r   r   r   r   r   <module>   s   