Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
3ef9fd0f
Unverified
Commit
3ef9fd0f
authored
Feb 23, 2026
by
Michael Goin
Committed by
GitHub
Feb 23, 2026
Browse files
[Bugfix] Fix DSV3 kernels breaking _C and _moe_C on unsupported arches (#35123)
Signed-off-by:
mgoin
<
mgoin64@gmail.com
>
parent
22a97e66
Changes
5
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
12 additions
and
3 deletions
+12
-3
CMakeLists.txt
CMakeLists.txt
+0
-1
csrc/dsv3_fused_a_gemm.cu
csrc/dsv3_fused_a_gemm.cu
+4
-0
csrc/moe/dsv3_router_gemm_entry.cu
csrc/moe/dsv3_router_gemm_entry.cu
+6
-0
csrc/moe/torch_bindings.cpp
csrc/moe/torch_bindings.cpp
+1
-1
csrc/torch_bindings.cpp
csrc/torch_bindings.cpp
+1
-1
No files found.
CMakeLists.txt
View file @
3ef9fd0f
...
@@ -783,7 +783,6 @@ if(VLLM_GPU_LANG STREQUAL "CUDA")
...
@@ -783,7 +783,6 @@ if(VLLM_GPU_LANG STREQUAL "CUDA")
SRCS
"
${
DSV3_FUSED_A_GEMM_SRC
}
"
SRCS
"
${
DSV3_FUSED_A_GEMM_SRC
}
"
CUDA_ARCHS
"
${
DSV3_FUSED_A_GEMM_ARCHS
}
"
)
CUDA_ARCHS
"
${
DSV3_FUSED_A_GEMM_ARCHS
}
"
)
list
(
APPEND VLLM_EXT_SRC
${
DSV3_FUSED_A_GEMM_SRC
}
)
list
(
APPEND VLLM_EXT_SRC
${
DSV3_FUSED_A_GEMM_SRC
}
)
list
(
APPEND VLLM_GPU_FLAGS
"-DENABLE_DSV3_FUSED_A_GEMM=1"
)
message
(
STATUS
"Building dsv3_fused_a_gemm for archs:
${
DSV3_FUSED_A_GEMM_ARCHS
}
"
)
message
(
STATUS
"Building dsv3_fused_a_gemm for archs:
${
DSV3_FUSED_A_GEMM_ARCHS
}
"
)
else
()
else
()
message
(
STATUS
"Not building dsv3_fused_a_gemm as no compatible archs found "
message
(
STATUS
"Not building dsv3_fused_a_gemm as no compatible archs found "
...
...
csrc/dsv3_fused_a_gemm.cu
View file @
3ef9fd0f
...
@@ -745,3 +745,7 @@ void dsv3_fused_a_gemm(torch::Tensor& output, torch::Tensor const& mat_a,
...
@@ -745,3 +745,7 @@ void dsv3_fused_a_gemm(torch::Tensor& output, torch::Tensor const& mat_a,
stream
);
stream
);
}
}
}
}
TORCH_LIBRARY_IMPL_EXPAND
(
TORCH_EXTENSION_NAME
,
CUDA
,
m
)
{
m
.
impl
(
"dsv3_fused_a_gemm"
,
&
dsv3_fused_a_gemm
);
}
csrc/moe/dsv3_router_gemm_entry.cu
View file @
3ef9fd0f
...
@@ -20,10 +20,12 @@
...
@@ -20,10 +20,12 @@
#include <ATen/ATen.h>
#include <ATen/ATen.h>
#include <ATen/cuda/CUDAContext.h>
#include <ATen/cuda/CUDAContext.h>
#include <torch/all.h>
#include <cuda_bf16.h>
#include <cuda_bf16.h>
#include <cuda_runtime.h>
#include <cuda_runtime.h>
#include "core/registration.h"
#include "dsv3_router_gemm_utils.h"
#include "dsv3_router_gemm_utils.h"
static
constexpr
int
DEFAULT_NUM_EXPERTS
=
256
;
static
constexpr
int
DEFAULT_NUM_EXPERTS
=
256
;
...
@@ -161,3 +163,7 @@ void dsv3_router_gemm(at::Tensor& output, // [num_tokens, num_experts]
...
@@ -161,3 +163,7 @@ void dsv3_router_gemm(at::Tensor& output, // [num_tokens, num_experts]
}
}
}
}
}
}
TORCH_LIBRARY_IMPL_EXPAND
(
TORCH_EXTENSION_NAME
,
CUDA
,
m
)
{
m
.
impl
(
"dsv3_router_gemm"
,
&
dsv3_router_gemm
);
}
csrc/moe/torch_bindings.cpp
View file @
3ef9fd0f
...
@@ -127,7 +127,7 @@ TORCH_LIBRARY_EXPAND(TORCH_EXTENSION_NAME, m) {
...
@@ -127,7 +127,7 @@ TORCH_LIBRARY_EXPAND(TORCH_EXTENSION_NAME, m) {
// DeepSeek V3 optimized router GEMM for SM90+
// DeepSeek V3 optimized router GEMM for SM90+
m
.
def
(
"dsv3_router_gemm(Tensor! output, Tensor mat_a, Tensor mat_b) -> ()"
);
m
.
def
(
"dsv3_router_gemm(Tensor! output, Tensor mat_a, Tensor mat_b) -> ()"
);
m
.
impl
(
"dsv3_router_gemm"
,
torch
::
kCUDA
,
&
dsv3_router_gemm
);
// conditionally compiled so impl registration is in source file
#endif
#endif
}
}
...
...
csrc/torch_bindings.cpp
View file @
3ef9fd0f
...
@@ -242,7 +242,7 @@ TORCH_LIBRARY_EXPAND(TORCH_EXTENSION_NAME, ops) {
...
@@ -242,7 +242,7 @@ TORCH_LIBRARY_EXPAND(TORCH_EXTENSION_NAME, ops) {
// DeepSeek V3 fused A GEMM (SM 9.0+, bf16 only, 1-16 tokens).
// DeepSeek V3 fused A GEMM (SM 9.0+, bf16 only, 1-16 tokens).
ops
.
def
(
ops
.
def
(
"dsv3_fused_a_gemm(Tensor! output, Tensor mat_a, Tensor mat_b) -> ()"
);
"dsv3_fused_a_gemm(Tensor! output, Tensor mat_a, Tensor mat_b) -> ()"
);
ops
.
impl
(
"dsv3_fused_a_gemm"
,
torch
::
kCUDA
,
&
dsv3_fused_a_gemm
);
// conditionally compiled so impl registration is in source file
// Quantized GEMM for AWQ.
// Quantized GEMM for AWQ.
ops
.
def
(
ops
.
def
(
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment