Unverified Commit a27825ae authored by fzyzcjy's avatar fzyzcjy Committed by GitHub
Browse files

Support not officially supported high sgl-kernel version with low srt version (#11786)

parent ce399e15
...@@ -265,6 +265,8 @@ from sgl_kernel.gemm import ( ...@@ -265,6 +265,8 @@ from sgl_kernel.gemm import (
scaled_fp4_quant, scaled_fp4_quant,
sgl_per_tensor_quant_fp8, sgl_per_tensor_quant_fp8,
sgl_per_token_group_quant_8bit, sgl_per_token_group_quant_8bit,
sgl_per_token_group_quant_fp8,
sgl_per_token_group_quant_int8,
sgl_per_token_quant_fp8, sgl_per_token_quant_fp8,
shuffle_rows, shuffle_rows,
silu_and_mul_scaled_fp4_grouped_quant, silu_and_mul_scaled_fp4_grouped_quant,
......
...@@ -137,6 +137,11 @@ def sgl_per_token_group_quant_8bit( ...@@ -137,6 +137,11 @@ def sgl_per_token_group_quant_8bit(
) )
# For legacy usage
sgl_per_token_group_quant_fp8 = sgl_per_token_group_quant_8bit
sgl_per_token_group_quant_int8 = sgl_per_token_group_quant_8bit
def sgl_per_tensor_quant_fp8( def sgl_per_tensor_quant_fp8(
input: torch.Tensor, input: torch.Tensor,
output_q: torch.Tensor, output_q: torch.Tensor,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment