[fix] fix determine_n_share_experts_fusion (#6118)

c178abda · JieXin Liang · GitHub · b29a026e · c178abda
Unverified Commit c178abda authored May 10, 2025 by JieXin Liang Committed by GitHub May 10, 2025
Hide whitespace changes
Inline Side-by-side

Showing with 5 additions and 3 deletions

python/sglang/srt/models/deepseek_v2.py python/sglang/srt/models/deepseek_v2.py +5 -3

No files found.
--- a/python/sglang/srt/models/deepseek_v2.py
+++ b/python/sglang/srt/models/deepseek_v2.py
@@ -1486,14 +1486,15 @@ class DeepseekV2ForCausalLM(nn.Module):
        if self.n_share_experts_fusion > 0:
            # Only Deepseek V3/R1 can use shared experts fusion optimization now.
            if (
-                self.config.architectures[0] != architecture
+                not _is_cuda
+                or self.config.architectures[0] != architecture
                or self.config.n_routed_experts != 256
            ):
                self.n_share_experts_fusion = 0
                global_server_args_dict["n_share_experts_fusion"] = 0
                log_info_on_rank0(
                    logger,
-                    "Only Deepseek V3/R1 can use shared experts fusion optimization. Shared experts fusion optimization is disabled.",
+                    "Only Deepseek V3/R1 on NV-platform can use shared experts fusion optimization. Shared experts fusion optimization is disabled.",
                )
            else:
                assert (
@@ -1501,7 +1502,8 @@ class DeepseekV2ForCausalLM(nn.Module):
                ), f"Shared experts fusion optimization is enabled in DeepSeek V3/R1, set it to {self.tp_size} can get best optimized performace."
        elif self.n_share_experts_fusion == 0:
            if (
-                torch.cuda.get_device_capability("cuda") >= (9, 0)
+                _is_cuda
+                and torch.cuda.get_device_capability("cuda") >= (9, 0)
                and self.config.architectures[0] == architecture
                and self.config.n_routed_experts == 256
                and (not global_server_args_dict["enable_deepep_moe"])