[CI/Build][AMD] Fix ref_dynamic_per_token_quant reference implementation on ROCm. (#30291)

Signed-off-by: Randall Smith <ransmith@amd.com> Co-authored-by: Randall Smith <ransmith@amd.com>

[CI/Build][AMD] Fix ref_dynamic_per_token_quant reference implementation on ROCm. (#30291)
Signed-off-by: Randall Smith <ransmith@amd.com> Co-authored-by: Randall Smith <ransmith@amd.com>
302b2c1e · rasmith · GitHub · 8f8fda26 · 302b2c1e
Unverified Commit 302b2c1e authored Dec 12, 2025 by rasmith Committed by GitHub Dec 12, 2025
Show whitespace changes
Inline Side-by-side

Showing with 4 additions and 9 deletions

tests/kernels/quant_utils.py tests/kernels/quant_utils.py +4 -9

No files found.
--- a/tests/kernels/quant_utils.py
+++ b/tests/kernels/quant_utils.py
@@ -30,16 +30,11 @@ def ref_dynamic_per_token_quant(
        if quant_dtype == torch.int8
        else torch.finfo(quant_dtype)
    )
-    qtype_traits_max = (
+    use_fp8fnuz = (
-        ROCM_FP8FNUZ_MAX
+        current_platform.is_fp8_fnuz() and quant_dtype == current_platform.fp8_dtype()
-        if current_platform.is_rocm() and current_platform.is_fp8_fnuz()
-        else qtype_traits.max
-    )
-    qtype_traits_min = (
-        -ROCM_FP8FNUZ_MAX
-        if current_platform.is_rocm() and current_platform.is_fp8_fnuz()
-        else qtype_traits.min
    )
+    qtype_traits_max = ROCM_FP8FNUZ_MAX if use_fp8fnuz else qtype_traits.max
+    qtype_traits_min = -ROCM_FP8FNUZ_MAX if use_fp8fnuz else qtype_traits.min
    qtype_max = as_float32_tensor(qtype_traits_max)
    s_1 = as_float32_tensor(1.0)
    s_512 = as_float32_tensor(512.0)