Unverified Commit a27825ae authored by fzyzcjy's avatar fzyzcjy Committed by GitHub
Browse files

Support not officially supported high sgl-kernel version with low srt version (#11786)

parent ce399e15
......@@ -265,6 +265,8 @@ from sgl_kernel.gemm import (
scaled_fp4_quant,
sgl_per_tensor_quant_fp8,
sgl_per_token_group_quant_8bit,
sgl_per_token_group_quant_fp8,
sgl_per_token_group_quant_int8,
sgl_per_token_quant_fp8,
shuffle_rows,
silu_and_mul_scaled_fp4_grouped_quant,
......
......@@ -137,6 +137,11 @@ def sgl_per_token_group_quant_8bit(
)
# For legacy usage
sgl_per_token_group_quant_fp8 = sgl_per_token_group_quant_8bit
sgl_per_token_group_quant_int8 = sgl_per_token_group_quant_8bit
def sgl_per_tensor_quant_fp8(
input: torch.Tensor,
output_q: torch.Tensor,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment