use fp32 for e_score_correction_bias in GLM-4.5 (#8729)

760286e3 · Yuxuan Zhang · GitHub · 3435a24e · 760286e3
Unverified Commit 760286e3 authored Aug 04, 2025 by Yuxuan Zhang Committed by GitHub Aug 03, 2025
Hide whitespace changes
Inline Side-by-side

Showing with 1 addition and 1 deletion

python/sglang/srt/models/glm4_moe.py python/sglang/srt/models/glm4_moe.py +1 -1

No files found.
--- a/python/sglang/srt/models/glm4_moe.py
+++ b/python/sglang/srt/models/glm4_moe.py
@@ -343,7 +343,7 @@ class Glm4MoeGate(nn.Module):
            torch.empty((config.n_routed_experts, config.hidden_size))
        )
        self.e_score_correction_bias = nn.Parameter(
-            torch.empty((config.n_routed_experts))
+            torch.empty((config.n_routed_experts), dtype=torch.float32)
        )
        if _is_cpu and _is_cpu_amx_available:
            self.quant_method = PackWeightMethod(weight_names=["weight"])