[hotfix] Fix deepep w4a8 bug (#12642)

42889acb · Baizhou Zhang · GitHub · 211f4070 · 42889acb
Unverified Commit 42889acb authored Nov 04, 2025 by Baizhou Zhang Committed by GitHub Nov 04, 2025
Hide whitespace changes
Inline Side-by-side

Showing with 3 additions and 3 deletions

python/sglang/srt/layers/moe/token_dispatcher/deepep.py python/sglang/srt/layers/moe/token_dispatcher/deepep.py +3 -3

No files found.
--- a/python/sglang/srt/layers/moe/token_dispatcher/deepep.py
+++ b/python/sglang/srt/layers/moe/token_dispatcher/deepep.py
@@ -357,9 +357,9 @@ class _DeepEPDispatcherImplNormal(_DeepEPDispatcherImplBase):
    ):
        topk_weights, topk_ids = topk_output.topk_weights, topk_output.topk_ids
        topk_ids = topk_ids.to(torch.int64)
-        if deep_gemm_wrapper.ENABLE_JIT_DEEPGEMM and not (
+        if (
-            get_moe_runner_backend().is_cutlass()
+            deep_gemm_wrapper.ENABLE_JIT_DEEPGEMM
-            and self.quant_config.get_name() == "w4afp8"
+            and not get_moe_runner_backend().is_cutlass()
        ):
            # TODO hard code 128 block quant,use fp8 communication
            hidden_states = sglang_per_token_group_quant_fp8(