Fix torch compile errors (#5158)

88d6fd9a · kk · GitHub · cc88d98a · 88d6fd9a
Unverified Commit 88d6fd9a authored Apr 08, 2025 by kk Committed by GitHub Apr 08, 2025
Show whitespace changes
Inline Side-by-side

Showing with 2 additions and 2 deletions

python/sglang/srt/layers/quantization/fp8_utils.py python/sglang/srt/layers/quantization/fp8_utils.py +2 -2

No files found.
--- a/python/sglang/srt/layers/quantization/fp8_utils.py
+++ b/python/sglang/srt/layers/quantization/fp8_utils.py
@@ -243,8 +243,8 @@ def apply_fp8_linear(
        if _is_cuda:
            qinput, x_scale = sglang_per_token_quant_fp8(input_2d)
        else:
-            qinput, x_scale = per_token_group_quant_fp8(
+            qinput, x_scale = ops.scaled_fp8_quant(
-                input_2d, group_size=input_2d.shape[1]
+                input_2d, input_scale, use_per_token_if_dynamic=use_per_token_if_dynamic
            )
    if cutlass_fp8_supported: