[Feat] Enable eplb with default all2all backend (#30559)

Signed-off-by: yewentao256 <zhyanwentao@126.com>

[Feat] Enable eplb with default all2all backend (#30559)
Signed-off-by: yewentao256 <zhyanwentao@126.com>
59bd5f6a · Wentao Ye · GitHub · 00a8d762 · 59bd5f6a
Unverified Commit 59bd5f6a authored Dec 16, 2025 by Wentao Ye Committed by GitHub Dec 16, 2025
Hide whitespace changes
Inline Side-by-side

Showing with 4 additions and 4 deletions

vllm/model_executor/layers/fused_moe/shared_fused_moe.py vllm/model_executor/layers/fused_moe/shared_fused_moe.py +4 -4

No files found.
--- a/vllm/model_executor/layers/fused_moe/shared_fused_moe.py
+++ b/vllm/model_executor/layers/fused_moe/shared_fused_moe.py
@@ -29,14 +29,14 @@ class SharedFusedMoE(FusedMoE):
        self._shared_experts = shared_experts
        # Disable shared expert overlap if:
-        #   - we are using eplb, because of correctness issues
+        #   - we are using eplb with non-default backend, because of correctness issues
-        #   - we are using flashinfer with DP, since there nothing to gain
+        #   - we are using flashinfer with DP, since there nothint to gain
        #   - we are using marlin kernels
+        backend = self.moe_parallel_config.all2all_backend
        self.use_overlapped = (
            use_overlapped
            and not (
-                # TODO(wentao): find the root cause and remove this condition
+                (self.enable_eplb and backend != "allgather_reducescatter")
-                self.enable_eplb
                or (self.moe_config.use_flashinfer_cutlass_kernels and self.dp_size > 1)
            )
            and self._shared_experts is not None