[Benchmarks] Fix imports in FP8 tuning script (#26407)

Signed-off-by: Lukas Geiger <lukas.geiger94@gmail.com>

[Benchmarks] Fix imports in FP8 tuning script (#26407)
Signed-off-by: Lukas Geiger <lukas.geiger94@gmail.com>
6273fe8d · Lukas Geiger · GitHub · 9fb3ae4e · 6273fe8d
Unverified Commit 6273fe8d authored Oct 08, 2025 by Lukas Geiger Committed by GitHub Oct 08, 2025
Hide whitespace changes
Inline Side-by-side

Showing with 2 additions and 2 deletions

benchmarks/kernels/benchmark_w8a8_block_fp8.py benchmarks/kernels/benchmark_w8a8_block_fp8.py +2 -2

No files found.
--- a/benchmarks/kernels/benchmark_w8a8_block_fp8.py
+++ b/benchmarks/kernels/benchmark_w8a8_block_fp8.py
@@ -14,7 +14,7 @@ import torch
 from tqdm import tqdm

 from vllm.model_executor.layers.quantization.utils.fp8_utils import (
-    _w8a8_block_fp8_matmul,
+    _w8a8_triton_block_scaled_mm,
 )
 from vllm.platforms import current_platform
 from vllm.triton_utils import triton
@@ -83,7 +83,7 @@ def w8a8_block_matmul(
        )

    if A.dtype == torch.float8_e4m3fn:
-        kernel = _w8a8_block_fp8_matmul
+        kernel = _w8a8_triton_block_scaled_mm
    else:
        raise RuntimeError("Currently, only support tune w8a8 block fp8 kernel.")