Fix bias handling in TritonMoeQuantInfo within quantization/mxfp4.py (#10579)

388c05d5 · yhyang201 · GitHub · fc809665 · 388c05d5
Unverified Commit 388c05d5 authored Sep 19, 2025 by yhyang201 Committed by GitHub Sep 18, 2025
Show whitespace changes
Inline Side-by-side

Showing with 2 additions and 2 deletions

python/sglang/srt/layers/quantization/mxfp4.py python/sglang/srt/layers/quantization/mxfp4.py +2 -2

No files found.
--- a/python/sglang/srt/layers/quantization/mxfp4.py
+++ b/python/sglang/srt/layers/quantization/mxfp4.py
@@ -731,8 +731,8 @@ class Mxfp4MoEMethod(FusedMoEMethodBase):
            quant_info = TritonMoeQuantInfo(
                w13_weight=layer.w13_weight,
                w2_weight=layer.w2_weight,
-                w13_weight_bias=layer.w13_weight_bias,
+                b13=getattr(layer, "w13_weight_bias", None),
-                w2_weight_bias=layer.w2_weight_bias,
+                b2=getattr(layer, "w2_weight_bias", None),
            )
            return self.runner.run(dispatch_output, quant_info)