Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
2e7035dd
Unverified
Commit
2e7035dd
authored
Dec 10, 2025
by
ElizaWszola
Committed by
GitHub
Dec 09, 2025
Browse files
[Bugfix] Fix fp8 DeepGemm compilation issues (#30336)
parent
4c2e10ea
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
2 additions
and
2 deletions
+2
-2
vllm/model_executor/layers/quantization/utils/fp8_utils.py
vllm/model_executor/layers/quantization/utils/fp8_utils.py
+2
-2
No files found.
vllm/model_executor/layers/quantization/utils/fp8_utils.py
View file @
2e7035dd
...
...
@@ -31,7 +31,6 @@ from vllm.model_executor.utils import replace_parameter
from
vllm.platforms
import
current_platform
from
vllm.triton_utils
import
tl
,
triton
from
vllm.utils.deep_gemm
import
(
DeepGemmQuantScaleFMT
,
fp8_gemm_nt
,
is_deep_gemm_e8m0_used
,
is_deep_gemm_supported
,
...
...
@@ -248,6 +247,7 @@ class W8A8BlockFp8LinearOp:
self
.
act_quant_group_shape
=
act_quant_group_shape
self
.
is_deep_gemm_supported
=
is_deep_gemm_supported
()
self
.
is_hopper
=
current_platform
.
is_device_capability
(
90
)
self
.
is_blackwell
=
current_platform
.
is_device_capability
(
100
)
self
.
use_deep_gemm_e8m0
=
is_deep_gemm_e8m0_used
()
# Get the correct blockscale mul and input quant operations.
...
...
@@ -303,7 +303,7 @@ class W8A8BlockFp8LinearOp:
weight
:
torch
.
Tensor
,
weight_scale
:
torch
.
Tensor
,
)
->
torch
.
Tensor
:
if
D
eep
G
emm
QuantScaleFMT
.
from_oracle
()
==
DeepGemmQuantScaleFMT
.
UE8M0
:
if
self
.
use_d
eep
_g
emm
_e8m0
and
self
.
is_blackwell
:
q_input
,
input_scale
=
per_token_group_quant_fp8_packed_for_deepgemm
(
input_2d
,
group_size
=
self
.
act_quant_group_shape
.
col
,
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment