[CI][AMD][Quantization][BugFix] Fix fp8 max in quant_utils.py and update...

[CI][AMD][Quantization][BugFix] Fix fp8 max in quant_utils.py and update test_fp8_quant.::test_static_fp8_quant_group_2d to use correct fp8 dtype and adjust atol/rtol (#32201) Signed-off-by: Randall Smith <ransmith@amd.com>

[CI][AMD][Quantization][BugFix] Fix fp8 max in quant_utils.py and update...
[CI][AMD][Quantization][BugFix] Fix fp8 max in quant_utils.py and update test_fp8_quant.::test_static_fp8_quant_group_2d to use correct fp8 dtype and adjust atol/rtol (#32201) Signed-off-by: Randall Smith <ransmith@amd.com>
3c268564 · rasmith · GitHub · 773d7073 · 3c268564 · 3c268564
Unverified Commit 3c268564 authored Jan 14, 2026 by rasmith Committed by GitHub Jan 15, 2026
Showing with 4 additions and 3 deletions

tests/kernels/quantization/test_fp8_quant.py tests/kernels/quantization/test_fp8_quant.py +2 -2

vllm/model_executor/layers/quantization/utils/quant_utils.py vllm/model_executor/layers/quantization/utils/quant_utils.py +2 -1

No files found.
--- a/tests/kernels/quantization/test_fp8_quant.py
+++ b/tests/kernels/quantization/test_fp8_quant.py
@@ -178,12 +178,12 @@ def test_static_fp8_quant_group_2d(
    x = torch.rand(num_tokens, hidden_size, dtype=dtype, device="cuda")
    ref_out, scale = scaled_quantize(
-        x, group_shape, FP8_DTYPE, compute_dtype=torch.float32
+        x, group_shape, current_platform.fp8_dtype(), compute_dtype=torch.float32
    )
    ops_out, ops_scale = ops.scaled_fp8_quant(x, scale=scale, group_shape=group_shape)
    torch.testing.assert_close(scale, ops_scale)
-    torch.testing.assert_close(ref_out.float(), ops_out.float(), rtol=0.12, atol=0.0)
+    torch.testing.assert_close(ref_out.float(), ops_out.float(), rtol=1.2e-1, atol=1e-5)
    opcheck_fp8_quant(ops_out, x, scale=scale)

--- a/vllm/model_executor/layers/quantization/utils/quant_utils.py
+++ b/vllm/model_executor/layers/quantization/utils/quant_utils.py
@@ -221,7 +221,8 @@ def scaled_quantize(
    # Compute scales
    min_val, max_val = x_blkd_permd.aminmax(dim=-1)
    amax = torch.maximum(min_val.abs(), max_val.abs()).clamp(min=1e-12)
-    scale = finfo.max / amax
+    _, fp8_max = get_fp8_min_max()
+    scale = fp8_max / amax
    # Apply scale and convert form:
    # (BLK_M, BLK_N, BLOCK_SIZE_M * BLOCK_SIZE_N) to (M, N)