[BugFix] Fix: ImportError when building on hopper systems (#20513)

Signed-off-by: Lucas Wilkinson <lwilkins@redhat.com>

[BugFix] Fix: ImportError when building on hopper systems (#20513)
Signed-off-by: Lucas Wilkinson <lwilkins@redhat.com>
40b86aa0 · Lucas Wilkinson · GitHub · 43287082 · 40b86aa0 · 40b86aa0
Unverified Commit 40b86aa0 authored Jul 06, 2025 by Lucas Wilkinson Committed by GitHub Jul 06, 2025
4 changed files
--- a/.github/CODEOWNERS
+++ b/.github/CODEOWNERS
@@ -16,7 +16,7 @@
 /vllm/lora @jeejeelee
 /vllm/reasoning @aarnphm
 /vllm/entrypoints @aarnphm
-CMakeLists.txt @tlrmchlsmth
+CMakeLists.txt @tlrmchlsmth @LucasWilkinson
 # Any change to the VllmConfig changes can have a large user-facing impact,
 # so spam a lot of people

--- a/csrc/ops.h
+++ b/csrc/ops.h
@@ -239,11 +239,6 @@ void cutlass_moe_mm(
    torch::Tensor const& b_strides, torch::Tensor const& c_strides,
    bool per_act_token, bool per_out_ch);
-void cutlass_blockwise_scaled_grouped_mm(
-    torch::Tensor& output, const torch::Tensor& a, const torch::Tensor& b,
-    const torch::Tensor& scales_a, const torch::Tensor& scales_b,
-    const torch::Tensor& problem_sizes, const torch::Tensor& expert_offsets);
 void cutlass_fp4_group_mm(
    torch::Tensor& output, const torch::Tensor& a, const torch::Tensor& b,
    const torch::Tensor& a_blockscale, const torch::Tensor& b_blockscales,

--- a/csrc/quantization/cutlass_w8a8/moe/blockwise_scaled_group_mm_sm100.cu
+++ b/csrc/quantization/cutlass_w8a8/moe/blockwise_scaled_group_mm_sm100.cu
+#include "core/registration.h"
 #include <torch/all.h>
 #include <cutlass/arch/arch.h>
@@ -364,4 +366,9 @@ void cutlass_blockwise_scaled_grouped_mm(
    TORCH_CHECK(false, "Unsupported output tensor type");
  }
 #endif
 }
\ No newline at end of file
+TORCH_LIBRARY_IMPL_EXPAND(TORCH_EXTENSION_NAME, CUDA, m) {
+  m.impl("cutlass_blockwise_scaled_grouped_mm",
+         &cutlass_blockwise_scaled_grouped_mm);
+}
--- a/csrc/torch_bindings.cpp
+++ b/csrc/torch_bindings.cpp
@@ -399,8 +399,7 @@ TORCH_LIBRARY_EXPAND(TORCH_EXTENSION_NAME, ops) {
      "Tensor scales_a, Tensor scales_b, "
      "Tensor problem_sizes, Tensor expert_offsets) -> ()",
      {stride_tag});
-  ops.impl("cutlass_blockwise_scaled_grouped_mm", torch::kCUDA,
+  // conditionally compiled so impl registration is in source file
-           &cutlass_blockwise_scaled_grouped_mm);
  // cutlass nvfp4 block scaled group GEMM
  ops.def(