Remove assertions about per group quant fp8 (#8717)

403566bc · fzyzcjy · GitHub · 0a56b721 · 403566bc
Unverified Commit 403566bc authored Aug 03, 2025 by fzyzcjy Committed by GitHub Aug 02, 2025
Hide whitespace changes
Inline Side-by-side

Showing with 0 additions and 4 deletions

python/sglang/srt/layers/quantization/fp8_kernel.py python/sglang/srt/layers/quantization/fp8_kernel.py +0 -4

No files found.
--- a/python/sglang/srt/layers/quantization/fp8_kernel.py
+++ b/python/sglang/srt/layers/quantization/fp8_kernel.py
@@ -354,10 +354,6 @@ def sglang_per_token_group_quant_fp8(
    ), "the last dimension of `x` cannot be divisible by `group_size`"
    assert x.is_contiguous(), "`x` is not contiguous"

-    if scale_ue8m0:
-        # TODO: handle this case by fixing the (token=4, dim=256, group_size=128) UT case
-        assert x.shape[-1] % (group_size * 4) == 0
-
    x_q = torch.empty_like(x, device=x.device, dtype=fp8_dtype)
    x_s = create_per_token_group_quant_fp8_output_scale(
        x_shape=x.shape,