Unverified Commit 385599cb authored by fzyzcjy's avatar fzyzcjy Committed by GitHub
Browse files

Fix error when calling quantization (#12548)

parent 952fbe47
...@@ -56,6 +56,8 @@ try: ...@@ -56,6 +56,8 @@ try:
from flashinfer import fp4_quantize from flashinfer import fp4_quantize
else: else:
from sgl_kernel import scaled_fp4_quant as fp4_quantize from sgl_kernel import scaled_fp4_quant as fp4_quantize
from flashinfer import fp4_quantize as fp4_quantize_flashinfer
except ImportError: except ImportError:
fp4_quantize = None fp4_quantize = None
...@@ -1571,7 +1573,7 @@ class ModelOptNvFp4FusedMoEMethod(FusedMoEMethodBase): ...@@ -1571,7 +1573,7 @@ class ModelOptNvFp4FusedMoEMethod(FusedMoEMethodBase):
# Quantize before comm, swizzle after. # Quantize before comm, swizzle after.
if x.shape[0] > 0: if x.shape[0] > 0:
x, x_sf = fp4_quantize( x, x_sf = fp4_quantize_flashinfer(
x, layer.w13_input_scale_quant, is_sf_swizzled_layout=False x, layer.w13_input_scale_quant, is_sf_swizzled_layout=False
) )
else: else:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment