Unverified Commit 403566bc authored by fzyzcjy's avatar fzyzcjy Committed by GitHub
Browse files

Remove assertions about per group quant fp8 (#8717)

parent 0a56b721
...@@ -354,10 +354,6 @@ def sglang_per_token_group_quant_fp8( ...@@ -354,10 +354,6 @@ def sglang_per_token_group_quant_fp8(
), "the last dimension of `x` cannot be divisible by `group_size`" ), "the last dimension of `x` cannot be divisible by `group_size`"
assert x.is_contiguous(), "`x` is not contiguous" assert x.is_contiguous(), "`x` is not contiguous"
if scale_ue8m0:
# TODO: handle this case by fixing the (token=4, dim=256, group_size=128) UT case
assert x.shape[-1] % (group_size * 4) == 0
x_q = torch.empty_like(x, device=x.device, dtype=fp8_dtype) x_q = torch.empty_like(x, device=x.device, dtype=fp8_dtype)
x_s = create_per_token_group_quant_fp8_output_scale( x_s = create_per_token_group_quant_fp8_output_scale(
x_shape=x.shape, x_shape=x.shape,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment