[Bugfix] Fix fp8 DeepGemm compilation issues (#30336)

2e7035dd · ElizaWszola · GitHub · 4c2e10ea · 2e7035dd
Unverified Commit 2e7035dd authored Dec 10, 2025 by ElizaWszola Committed by GitHub Dec 09, 2025
Hide whitespace changes
Inline Side-by-side

Showing with 2 additions and 2 deletions

vllm/model_executor/layers/quantization/utils/fp8_utils.py vllm/model_executor/layers/quantization/utils/fp8_utils.py +2 -2

No files found.
--- a/vllm/model_executor/layers/quantization/utils/fp8_utils.py
+++ b/vllm/model_executor/layers/quantization/utils/fp8_utils.py
@@ -31,7 +31,6 @@ from vllm.model_executor.utils import replace_parameter
 from vllm.platforms import current_platform
 from vllm.triton_utils import tl, triton
 from vllm.utils.deep_gemm import (
-    DeepGemmQuantScaleFMT,
    fp8_gemm_nt,
    is_deep_gemm_e8m0_used,
    is_deep_gemm_supported,
@@ -248,6 +247,7 @@ class W8A8BlockFp8LinearOp:
        self.act_quant_group_shape = act_quant_group_shape
        self.is_deep_gemm_supported = is_deep_gemm_supported()
        self.is_hopper = current_platform.is_device_capability(90)
+        self.is_blackwell = current_platform.is_device_capability(100)
        self.use_deep_gemm_e8m0 = is_deep_gemm_e8m0_used()

        # Get the correct blockscale mul and input quant operations.
@@ -303,7 +303,7 @@ class W8A8BlockFp8LinearOp:
        weight: torch.Tensor,
        weight_scale: torch.Tensor,
    ) -> torch.Tensor:
-        if DeepGemmQuantScaleFMT.from_oracle() == DeepGemmQuantScaleFMT.UE8M0:
+        if self.use_deep_gemm_e8m0 and self.is_blackwell:
            q_input, input_scale = per_token_group_quant_fp8_packed_for_deepgemm(
                input_2d,
                group_size=self.act_quant_group_shape.col,