Unverified Commit 88d6fd9a authored by kk's avatar kk Committed by GitHub
Browse files

Fix torch compile errors (#5158)

parent cc88d98a
......@@ -243,8 +243,8 @@ def apply_fp8_linear(
if _is_cuda:
qinput, x_scale = sglang_per_token_quant_fp8(input_2d)
else:
qinput, x_scale = per_token_group_quant_fp8(
input_2d, group_size=input_2d.shape[1]
qinput, x_scale = ops.scaled_fp8_quant(
input_2d, input_scale, use_per_token_if_dynamic=use_per_token_if_dynamic
)
if cutlass_fp8_supported:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment