[Fix] RuntimeError: get_cfg Unsupported input_type:Float4_e2m1fn_x2 in using...

[Fix] RuntimeError: get_cfg Unsupported input_type:Float4_e2m1fn_x2 in using aiter-mxfp4-moe (#10981) Co-authored-by: wunhuang <wunhuang@amd.com>

[Fix] RuntimeError: get_cfg Unsupported input_type:Float4_e2m1fn_x2 in using...
[Fix] RuntimeError: get_cfg Unsupported input_type:Float4_e2m1fn_x2 in using aiter-mxfp4-moe (#10981) Co-authored-by: wunhuang <wunhuang@amd.com>
8ebf72fe · kk · GitHub · 82605747 · 8ebf72fe · 8ebf72fe
Unverified Commit 8ebf72fe authored Sep 27, 2025 by kk Committed by GitHub Sep 26, 2025
Showing with 19 additions and 4 deletions

python/sglang/srt/layers/quantization/mxfp4.py python/sglang/srt/layers/quantization/mxfp4.py +10 -2

python/sglang/srt/layers/quantization/quark/quark_moe.py python/sglang/srt/layers/quantization/quark/quark_moe.py +9 -2

No files found.
--- a/python/sglang/srt/layers/quantization/mxfp4.py
+++ b/python/sglang/srt/layers/quantization/mxfp4.py
@@ -843,10 +843,18 @@ class Mxfp4DynamicQuantMoEMethod(FusedMoEMethodBase):
            topk_weights = topk_weights.to(
                torch.float32
            )  # aiter's moe_sorting requires topk_weights to be FP32
+
+        if hasattr(torch, "float4_e2m1fn_x2"):
+            w13_weight = layer.w13_weight.view(torch.float4_e2m1fn_x2)
+            w2_weight = layer.w2_weight.view(torch.float4_e2m1fn_x2)
+        else:
+            w13_weight = layer.w13_weight
+            w2_weight = layer.w2_weight
+
        output = fused_moe(
            x,
-            layer.w13_weight,
-            layer.w2_weight,
+            w13_weight,
+            w2_weight,
            topk_weights,
            topk_ids,
            quant_type=QuantType.per_1x32,

--- a/python/sglang/srt/layers/quantization/quark/quark_moe.py
+++ b/python/sglang/srt/layers/quantization/quark/quark_moe.py
@@ -183,10 +183,17 @@ class QuarkW4A4MXFp4MoEMethod(QuarkMoEMethod):
        moe_runner_config = self.moe_runner_config
        topk_weights, topk_ids, _ = topk_output

+        if hasattr(torch, "float4_e2m1fn_x2"):
+            w13_weight = layer.w13_weight.view(torch.float4_e2m1fn_x2)
+            w2_weight = layer.w2_weight.view(torch.float4_e2m1fn_x2)
+        else:
+            w13_weight = layer.w13_weight
+            w2_weight = layer.w2_weight
+
        output = fused_moe(
            x,
-            layer.w13_weight,
-            layer.w2_weight,
+            w13_weight,
+            w2_weight,
            topk_weights,
            topk_ids,
            quant_type=QuantType.per_1x32,