[ROCm] Fix fp8 quantization accuracy issue. (#10558)

c32fb7a2 · sogalin · GitHub · 1ba137e9 · c32fb7a2
Unverified Commit c32fb7a2 authored Sep 18, 2025 by sogalin Committed by GitHub Sep 17, 2025
Hide whitespace changes
Inline Side-by-side

Showing with 1 addition and 1 deletion

python/sglang/srt/layers/quantization/fp8_utils.py python/sglang/srt/layers/quantization/fp8_utils.py +1 -1

No files found.
--- a/python/sglang/srt/layers/quantization/fp8_utils.py
+++ b/python/sglang/srt/layers/quantization/fp8_utils.py
@@ -732,7 +732,7 @@ def apply_fp8_linear(
                # final solution should be: 1. add support to per-tensor activation scaling.
                # 2. solve the torch.compile error from weight_scale.numel() == 1 and x_scale.numel() > 1 (below line#308)
                if _is_hip and weight_scale.numel() == 1:
-                    qinput, x_scale = ops.scaled_fp8_quant(
+                    qinput, x_scale = scaled_fp8_quant(
                        input_2d,
                        input_scale,
                        use_per_token_if_dynamic=use_per_token_if_dynamic,