Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
bcdfb2a3
Unverified
Commit
bcdfb2a3
authored
Jul 15, 2025
by
Michael Goin
Committed by
GitHub
Jul 15, 2025
Browse files
[Bugfix] Fix incorrect dispatch for CutlassBlockScaledGroupedGemm and DeepGEMM (#20933)
Signed-off-by:
mgoin
<
mgoin64@gmail.com
>
parent
ba8c3000
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
10 additions
and
5 deletions
+10
-5
vllm/model_executor/layers/quantization/fp8.py
vllm/model_executor/layers/quantization/fp8.py
+10
-5
No files found.
vllm/model_executor/layers/quantization/fp8.py
View file @
bcdfb2a3
...
...
@@ -488,11 +488,16 @@ class Fp8MoEMethod(FusedMoEMethodBase):
logger
.
warning_once
(
"Failed to import DeepGemm kernels."
)
elif
not
self
.
block_quant
:
logger
.
warning_once
(
"Model is not block quantized. Not using "
"
DeepGemm kernels"
)
"DeepGemm kernels"
)
elif
(
current_platform
.
is_cuda
()
and
current_platform
.
ha
s_device_capability
(
90
)):
and
current_platform
.
i
s_device_capability
(
90
)):
logger
.
info_once
(
"Using DeepGemm kernels for Fp8MoEMethod."
)
self
.
allow_deep_gemm
=
True
elif
(
current_platform
.
is_cuda
()
and
is_blackwell_deep_gemm_used
()):
logger
.
info_once
(
"Using DeepGemm SM100 kernels for "
"Fp8MoEMethod."
)
self
.
allow_deep_gemm
=
True
else
:
logger
.
warning_once
(
"DeepGemm not supported on the current platform."
)
...
...
@@ -500,10 +505,10 @@ class Fp8MoEMethod(FusedMoEMethodBase):
# Check for CutlassBlockScaledGroupedGemm support.
self
.
allow_cutlass_block_scaled_grouped_gemm
=
False
if
not
self
.
block_quant
:
logger
.
warnin
g_once
(
"Model is not block quantized. Not using "
"CutlassBlockScaledGroupedGemm kernels"
)
logger
.
debu
g_once
(
"Model is not block quantized. Not using "
"CutlassBlockScaledGroupedGemm kernels"
)
elif
(
current_platform
.
is_cuda
()
and
current_platform
.
ha
s_device_capability
(
100
)):
and
current_platform
.
i
s_device_capability
(
100
)):
logger
.
info_once
(
"Using CutlassBlockScaledGroupedGemm kernels for Fp8MoEMethod."
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment