[BugFix] Fix routed_scaling_factor double mul for dots1 and glm4 MoE models (#24132)

Signed-off-by: Yong Hoon Shin <yhshin@meta.com>

[BugFix] Fix routed_scaling_factor double mul for dots1 and glm4 MoE models (#24132)
Signed-off-by: Yong Hoon Shin <yhshin@meta.com>
426cc862 · Yong Hoon Shin · GitHub · e81d4e69 · 426cc862 · 426cc862
Unverified Commit 426cc862 authored Sep 02, 2025 by Yong Hoon Shin Committed by GitHub Sep 03, 2025
Hide whitespace changes
Inline Side-by-side

Showing with 4 additions and 2 deletions

vllm/model_executor/models/dots1.py vllm/model_executor/models/dots1.py +2 -1

vllm/model_executor/models/glm4_moe.py vllm/model_executor/models/glm4_moe.py +2 -1

No files found.
--- a/vllm/model_executor/models/dots1.py
+++ b/vllm/model_executor/models/dots1.py
@@ -137,7 +137,8 @@ class Dots1MoE(nn.Module):
            topk_group=config.topk_group,
            prefix=f"{prefix}.experts",
            scoring_func=config.scoring_func,
-            routed_scaling_factor=self.routed_scaling_factor,
+            # we do scaling outside, set factor to 1.0 to avoid double mul
+            routed_scaling_factor=1.0,
            e_score_correction_bias=self.gate.e_score_correction_bias)

        if config.n_shared_experts is not None:

--- a/vllm/model_executor/models/glm4_moe.py
+++ b/vllm/model_executor/models/glm4_moe.py
@@ -159,7 +159,8 @@ class Glm4MoE(nn.Module):
            topk_group=config.topk_group,
            prefix=f"{prefix}.experts",
            scoring_func="sigmoid",
-            routed_scaling_factor=self.routed_scaling_factor,
+            # we do scaling outside, set factor to 1.0 to avoid double mul
+            routed_scaling_factor=1.0,
            e_score_correction_bias=self.gate.e_score_correction_bias,
            enable_eplb=self.enable_eplb,
            num_redundant_experts=self.n_redundant_experts)