Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
40b86aa0
Unverified
Commit
40b86aa0
authored
Jul 06, 2025
by
Lucas Wilkinson
Committed by
GitHub
Jul 06, 2025
Browse files
[BugFix] Fix: ImportError when building on hopper systems (#20513)
Signed-off-by:
Lucas Wilkinson
<
lwilkins@redhat.com
>
parent
43287082
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
10 additions
and
9 deletions
+10
-9
.github/CODEOWNERS
.github/CODEOWNERS
+1
-1
csrc/ops.h
csrc/ops.h
+0
-5
csrc/quantization/cutlass_w8a8/moe/blockwise_scaled_group_mm_sm100.cu
...ation/cutlass_w8a8/moe/blockwise_scaled_group_mm_sm100.cu
+8
-1
csrc/torch_bindings.cpp
csrc/torch_bindings.cpp
+1
-2
No files found.
.github/CODEOWNERS
View file @
40b86aa0
...
@@ -16,7 +16,7 @@
...
@@ -16,7 +16,7 @@
/vllm/lora @jeejeelee
/vllm/lora @jeejeelee
/vllm/reasoning @aarnphm
/vllm/reasoning @aarnphm
/vllm/entrypoints @aarnphm
/vllm/entrypoints @aarnphm
CMakeLists.txt @tlrmchlsmth
CMakeLists.txt @tlrmchlsmth
@LucasWilkinson
# Any change to the VllmConfig changes can have a large user-facing impact,
# Any change to the VllmConfig changes can have a large user-facing impact,
# so spam a lot of people
# so spam a lot of people
...
...
csrc/ops.h
View file @
40b86aa0
...
@@ -239,11 +239,6 @@ void cutlass_moe_mm(
...
@@ -239,11 +239,6 @@ void cutlass_moe_mm(
torch
::
Tensor
const
&
b_strides
,
torch
::
Tensor
const
&
c_strides
,
torch
::
Tensor
const
&
b_strides
,
torch
::
Tensor
const
&
c_strides
,
bool
per_act_token
,
bool
per_out_ch
);
bool
per_act_token
,
bool
per_out_ch
);
void
cutlass_blockwise_scaled_grouped_mm
(
torch
::
Tensor
&
output
,
const
torch
::
Tensor
&
a
,
const
torch
::
Tensor
&
b
,
const
torch
::
Tensor
&
scales_a
,
const
torch
::
Tensor
&
scales_b
,
const
torch
::
Tensor
&
problem_sizes
,
const
torch
::
Tensor
&
expert_offsets
);
void
cutlass_fp4_group_mm
(
void
cutlass_fp4_group_mm
(
torch
::
Tensor
&
output
,
const
torch
::
Tensor
&
a
,
const
torch
::
Tensor
&
b
,
torch
::
Tensor
&
output
,
const
torch
::
Tensor
&
a
,
const
torch
::
Tensor
&
b
,
const
torch
::
Tensor
&
a_blockscale
,
const
torch
::
Tensor
&
b_blockscales
,
const
torch
::
Tensor
&
a_blockscale
,
const
torch
::
Tensor
&
b_blockscales
,
...
...
csrc/quantization/cutlass_w8a8/moe/blockwise_scaled_group_mm_sm100.cu
View file @
40b86aa0
#include "core/registration.h"
#include <torch/all.h>
#include <torch/all.h>
#include <cutlass/arch/arch.h>
#include <cutlass/arch/arch.h>
...
@@ -364,4 +366,9 @@ void cutlass_blockwise_scaled_grouped_mm(
...
@@ -364,4 +366,9 @@ void cutlass_blockwise_scaled_grouped_mm(
TORCH_CHECK
(
false
,
"Unsupported output tensor type"
);
TORCH_CHECK
(
false
,
"Unsupported output tensor type"
);
}
}
#endif
#endif
}
}
\ No newline at end of file
TORCH_LIBRARY_IMPL_EXPAND
(
TORCH_EXTENSION_NAME
,
CUDA
,
m
)
{
m
.
impl
(
"cutlass_blockwise_scaled_grouped_mm"
,
&
cutlass_blockwise_scaled_grouped_mm
);
}
csrc/torch_bindings.cpp
View file @
40b86aa0
...
@@ -399,8 +399,7 @@ TORCH_LIBRARY_EXPAND(TORCH_EXTENSION_NAME, ops) {
...
@@ -399,8 +399,7 @@ TORCH_LIBRARY_EXPAND(TORCH_EXTENSION_NAME, ops) {
"Tensor scales_a, Tensor scales_b, "
"Tensor scales_a, Tensor scales_b, "
"Tensor problem_sizes, Tensor expert_offsets) -> ()"
,
"Tensor problem_sizes, Tensor expert_offsets) -> ()"
,
{
stride_tag
});
{
stride_tag
});
ops
.
impl
(
"cutlass_blockwise_scaled_grouped_mm"
,
torch
::
kCUDA
,
// conditionally compiled so impl registration is in source file
&
cutlass_blockwise_scaled_grouped_mm
);
// cutlass nvfp4 block scaled group GEMM
// cutlass nvfp4 block scaled group GEMM
ops
.
def
(
ops
.
def
(
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment