GLM-4.5 and GLM-4.5-Air both support (#8804)

a4b0d5c9 · Yuxuan Zhang · GitHub · 40e3b2be · a4b0d5c9
Unverified Commit a4b0d5c9 authored Aug 05, 2025 by Yuxuan Zhang Committed by GitHub Aug 05, 2025
Hide whitespace changes
Inline Side-by-side

Showing with 1 addition and 2 deletions

python/sglang/srt/models/glm4_moe.py python/sglang/srt/models/glm4_moe.py +1 -2

No files found.
--- a/python/sglang/srt/models/glm4_moe.py
+++ b/python/sglang/srt/models/glm4_moe.py
@@ -785,7 +785,7 @@ class Glm4MoeForCausalLM(DeepseekV2ForCausalLM):
        )

    def determine_num_fused_shared_experts(
-        self, architecture: str = "DeepseekV3ForCausalLM"
+        self, architecture: str = "Glm4MoeForCausalLM"
    ):
        self.num_fused_shared_experts = 0
        if global_server_args_dict["disable_shared_experts_fusion"]:
@@ -797,7 +797,6 @@ class Glm4MoeForCausalLM(DeepseekV2ForCausalLM):
            not _is_cuda
            or torch.cuda.get_device_capability("cuda") < (8, 0)
            or self.config.architectures[0] != architecture
-            or self.config.n_routed_experts != 128
            or self.config.n_shared_experts != 1
        ):
            disable_reason = "Only GLM-4.5 on NV-platform with capability >= 80 can use shared experts fusion optimization."