Unverified Commit 6273fe8d authored by Lukas Geiger's avatar Lukas Geiger Committed by GitHub
Browse files

[Benchmarks] Fix imports in FP8 tuning script (#26407)


Signed-off-by: default avatarLukas Geiger <lukas.geiger94@gmail.com>
parent 9fb3ae4e
......@@ -14,7 +14,7 @@ import torch
from tqdm import tqdm
from vllm.model_executor.layers.quantization.utils.fp8_utils import (
_w8a8_block_fp8_matmul,
_w8a8_triton_block_scaled_mm,
)
from vllm.platforms import current_platform
from vllm.triton_utils import triton
......@@ -83,7 +83,7 @@ def w8a8_block_matmul(
)
if A.dtype == torch.float8_e4m3fn:
kernel = _w8a8_block_fp8_matmul
kernel = _w8a8_triton_block_scaled_mm
else:
raise RuntimeError("Currently, only support tune w8a8 block fp8 kernel.")
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment