o
    i-                  	   @   s  d Z ddlmZ ddlmZ ddlmZmZ ddl	m
Z
mZmZmZmZ ddlmZ dd	lmZmZ dd
lmZ ddede
fddZddede
fddZddede
fddZddede
fddZddede
fddZ	ddee dedede
fddZde
fddZdS ) a3  
Copyright (c) 2025 by FlashInfer team.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

  http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
    )List   )env   )ArtifactPathCheckSumHash)JitSpecgen_jit_speccurrent_compilation_contextsm90a_nvcc_flagssm89_nvcc_flags)is_cuda_version_at_least)	get_cubinget_meta_hash)generate_gemm_operationsFuse_fast_buildreturnc                 C   &   g d}|t jdgd7 }t|d| S )N)-DCOMPILE_BLACKWELL_TMA_GEMMSz+-DCOMPILE_BLACKWELL_SM120_TMA_GROUPED_GEMMS-DENABLE_BF16-DENABLE_FP8-DENABLE_FP4-DUSING_OSS_CUTLASS_MOE_GEMM   supported_major_versions120r
   get_nvcc_flags_listgen_cutlass_fused_moe_moduler   
nvcc_flags r"   N/home/ubuntu/vllm_env/lib/python3.10/site-packages/flashinfer/jit/fused_moe.py"gen_cutlass_fused_moe_sm120_module!   s
   	r$   c                 C   r   )N)r   %-DCOMPILE_BLACKWELL_TMA_GROUPED_GEMMSr   r   r   r   z+-DCOMPILE_BLACKWELL_SM103_TMA_GROUPED_GEMMS
   r   103r   r    r"   r"   r#   "gen_cutlass_fused_moe_sm103_module2   s
   
r(   c                 C   s(   g d}|t jddgd7 }t|d| S )N)r   r%   r   r   r   r   r&      r   100r   r    r"   r"   r#   "gen_cutlass_fused_moe_sm100_moduleD   s
   	r+   c                 C   s:   t ddddtdrdndtdrdndd	g }t|d
| S )Nz-DCOMPILE_HOPPER_TMA_GEMMSz"-DCOMPILE_HOPPER_TMA_GROUPED_GEMMSr   r   12.8-DENABLE_FP8_BLOCK_SCALE r   r   90)r   r   r   r    r"   r"   r#   !gen_cutlass_fused_moe_sm90_moduleU   s   	r0   c                 C   s(   t ddtdr	dnddg }t|d| S )Nr   r   r,   r-   r.   r   89)r   r   r   r    r"   r"   r#   !gen_cutlass_fused_moe_sm89_moduleb   s   r2   r!   device_archc                    s  t jd|   z jddd t | d| d W n ty0 } ztd| |d}~ww td| t jd	 t jd
 t jd t jd t jd t jd t jd t jd t jd t jd t jd t jd t jd t jd t jd t jd t jd t jd t jd g fdd dD t jd t jd  t jd! t jd" t jd# t jd$ t jd% t jd& | |rd'gng d(gt jd) t jd) d* t jd) d+ d, d* t jd) d+ d- d. d* t jd) d+ d- d.  gd/S )0z>
    Generate a JitSpec for the cutlass fused moe module.
    zcutlass_instantiations/T)parentsexist_ok;z-realz$Failed to generate Cutlass kernels: N
fused_moe_z`nv_internal/tensorrt_llm/kernels/cutlass_kernels/moe_gemm/moe_gemm_tma_warp_specialized_input.cuzWnv_internal/tensorrt_llm/kernels/cutlass_kernels/moe_gemm/moe_gemm_kernels_fp8_uint4.cuzUnv_internal/tensorrt_llm/kernels/cutlass_kernels/moe_gemm/moe_gemm_kernels_fp8_fp8.cuzUnv_internal/tensorrt_llm/kernels/cutlass_kernels/moe_gemm/moe_gemm_kernels_fp8_fp4.cuzUnv_internal/tensorrt_llm/kernels/cutlass_kernels/moe_gemm/moe_gemm_kernels_fp4_fp4.cuzWnv_internal/tensorrt_llm/kernels/cutlass_kernels/moe_gemm/moe_gemm_kernels_fp32_fp32.cuzXnv_internal/tensorrt_llm/kernels/cutlass_kernels/moe_gemm/moe_gemm_kernels_fp16_uint8.cuzXnv_internal/tensorrt_llm/kernels/cutlass_kernels/moe_gemm/moe_gemm_kernels_fp16_uint4.cuzWnv_internal/tensorrt_llm/kernels/cutlass_kernels/moe_gemm/moe_gemm_kernels_fp16_fp16.cuzXnv_internal/tensorrt_llm/kernels/cutlass_kernels/moe_gemm/moe_gemm_kernels_bf16_uint8.cuzXnv_internal/tensorrt_llm/kernels/cutlass_kernels/moe_gemm/moe_gemm_kernels_bf16_uint4.cuzVnv_internal/tensorrt_llm/kernels/cutlass_kernels/moe_gemm/moe_gemm_kernels_bf16_fp8.cuzWnv_internal/tensorrt_llm/kernels/cutlass_kernels/moe_gemm/moe_gemm_kernels_bf16_bf16.cuzVnv_internal/tensorrt_llm/kernels/cutlass_kernels/moe_gemm/moe_gemm_kernels_bf16_fp4.cuzVnv_internal/tensorrt_llm/kernels/cutlass_kernels/moe_gemm/moe_gemm_kernels_fp16_fp4.cuz[nv_internal/tensorrt_llm/kernels/cutlass_kernels/fp8_blockscale_gemm/fp8_blockscale_gemm.cuzAfused_moe/cutlass_backend/flashinfer_cutlass_fused_moe_binding.cuz/fused_moe/cutlass_backend/deepgemm_jit_setup.cuz<fused_moe/cutlass_backend/cutlass_fused_moe_instantiation.cuc                 3   s    | ]} | V  qd S )Nr"   ).0kernel
output_dirr"   r#   	<genexpr>   s    z/gen_cutlass_fused_moe_module.<locals>.<genexpr>z*.generated.cu#nv_internal/cpp/common/envUtils.cpp!nv_internal/cpp/common/logger.cpp&nv_internal/cpp/common/stringUtils.cpp(nv_internal/cpp/common/tllmException.cpp%nv_internal/cpp/common/memoryUtils.cuz7nv_internal/tensorrt_llm/kernels/preQuantScaleKernel.cuzFnv_internal/tensorrt_llm/kernels/cutlass_kernels/cutlass_heuristic.cppz.nv_internal/tensorrt_llm/kernels/lora/lora.cppz-DFAST_BUILDz-lnvrtcnv_internalincludetensorrt_llmcutlass_extensionskernelscutlass_kernels)extra_cuda_cflagsextra_cflagsextra_ldflagsextra_include_paths)	jit_envFLASHINFER_GEN_SRC_DIRmkdirr   	ExceptionRuntimeErrorr	   FLASHINFER_CSRC_DIRrglob)r!   r3   r   er"   r:   r#   r   l   s
  	()*+,-.025r   c                  C   s  t j d} d}t j d}t|tj}|sJ d| t|}t|  d| d|}|s5J | dtjdgd	}td
tj	d tj	d tj	d tj	d tj	d tj	d tj	d tj	d tj	d tj	d tj	d tj	d gdddddddt j dg| tj
|  tj	d tj	d  gd!S )"Nz/includeflashinferMetaInfoz/checksums.txtz!Failed to get checksums.txt from /z.hz.h not foundr&   r   fused_moe_trtllm_sm100r=   r>   r?   r@   rA   z#trtllm_fused_moe_kernel_launcher.cuztrtllm_fused_moe_runner.cuz$trtllm_fused_moe_routing_deepseek.cuz"trtllm_fused_moe_routing_llama4.cuz'trtllm_fused_moe_routing_renormalize.cuztrtllm_fused_moe_dev_kernel.cuztrtllm_batched_gemm_runner.cuz-DTLLM_GEN_EXPORT_INTERFACEz-DTLLM_GEN_EXPORT_FLASHINFERz-DTLLM_ENABLE_CUDAr   r   r   z-DTLLM_GEN_GEMM_CUBIN_PATH=\"z\"rB   znv_internal/include)rH   rK   )r   TRTLLM_GEN_BMMr   r   r   r
   r   r	   rL   rQ   FLASHINFER_CUBIN_DIR)include_pathheader_namechecksum_pathchecksum	meta_hashmetainfor!   r"   r"   r#   %gen_trtllm_gen_fused_moe_sm100_module   sX   	r_   N)F)__doc__typingr   r.   r   rL   	artifactsr   r   corer   r	   r
   r   r   cpp_extr   cubin_loaderr   r   gemm.cutlass.generate_kernelsr   boolr$   r(   r+   r0   r2   strr   r_   r"   r"   r"   r#   <module>   s0    
k