[BugFix/Build] Fix sparse kernels not getting built on hopper (#14572)

Signed-off-by: Lucas Wilkinson <lwilkins@redhat.com>

[BugFix/Build] Fix sparse kernels not getting built on hopper (#14572)
Signed-off-by: Lucas Wilkinson <lwilkins@redhat.com>
07b4b7a3 · Lucas Wilkinson · GitHub · 07964e2f · 07b4b7a3 · 07b4b7a3
Unverified Commit 07b4b7a3 authored Mar 11, 2025 by Lucas Wilkinson Committed by GitHub Mar 11, 2025
Hide whitespace changes
Inline Side-by-side

Showing with 8 additions and 3 deletions

CMakeLists.txt CMakeLists.txt +2 -1

csrc/sparse/cutlass/sparse_scaled_mm_entry.cu csrc/sparse/cutlass/sparse_scaled_mm_entry.cu +6 -2

No files found.
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -422,7 +422,8 @@ if(VLLM_GPU_LANG STREQUAL "CUDA")
  # 2:4 Sparse Kernels

  # The 2:4 sparse kernels cutlass_scaled_sparse_mm and cutlass_compressor
-  # require CUDA 12.2 or later (and only work on Hopper and Blackwell).
+  # require CUDA 12.2 or later (and only work on Hopper).
+  cuda_archs_loose_intersection(SCALED_MM_ARCHS "9.0a;" "${CUDA_ARCHS}")
  if(${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER 12.2 AND SCALED_MM_ARCHS)
    set(SRCS "csrc/sparse/cutlass/sparse_scaled_mm_c3x.cu")
    set_gencode_flags_for_srcs(

--- a/csrc/sparse/cutlass/sparse_scaled_mm_entry.cu
+++ b/csrc/sparse/cutlass/sparse_scaled_mm_entry.cu
@@ -58,7 +58,9 @@ void cutlass_scaled_sparse_mm(torch::Tensor& c, torch::Tensor const& a,

  // Guard against compilation issues for sm90 kernels
 #if defined ENABLE_SPARSE_SCALED_MM_C3X && ENABLE_SPARSE_SCALED_MM_C3X
-  if (version_num >= 90) {
+  // We build for 9.0a which is not forward compatible, so restrict this to
+  // Hopper only
+  if (version_num == 90) {
    cutlass_scaled_sparse_mm_sm90(c, a, bt_nzs, bt_meta, a_scales, b_scales,
                                  bias);
    return;
@@ -82,7 +84,9 @@ std::vector<torch::Tensor> cutlass_sparse_compress(torch::Tensor const& a) {

  // Guard against compilation issues for sm90 kernels
 #if defined ENABLE_SPARSE_SCALED_MM_C3X && ENABLE_SPARSE_SCALED_MM_C3X
-  if (version_num >= 90) {
+  // We build for 9.0a which is not forward compatible, so restrict this to
+  // Hopper only
+  if (version_num == 90) {
    std::vector<torch::Tensor> result_tensors;

    auto [a_meta, a_nzs] = cutlass_sparse_compress_sm90(a);