fix moe-ep accuracy issue for fp8 (#2489)

b532a5fd · xiaobochen · GitHub · a0592c05 · b532a5fd
Unverified Commit b532a5fd authored Dec 16, 2024 by xiaobochen Committed by GitHub Dec 16, 2024
Hide whitespace changes
Inline Side-by-side

Showing with 4 additions and 0 deletions

python/sglang/srt/layers/ep_moe/layer.py python/sglang/srt/layers/ep_moe/layer.py +4 -0

No files found.
--- a/python/sglang/srt/layers/ep_moe/layer.py
+++ b/python/sglang/srt/layers/ep_moe/layer.py
@@ -644,6 +644,10 @@ class Fp8EPMoEMethod(Fp8MoEMethod):
                        "QuantConfig has static quantization, but found "
                        "activation scales are None."
                    )
+                layer.w13_weight_scale = torch.nn.Parameter(
+                    torch.max(layer.w13_weight_scale, dim=1).values,
+                    requires_grad=False,
+                )
            return

    def apply(