[Bugfix] fix use-ep bug to enable ep by dp/tp size > 1 (#16161)

ad971af8 · zxfan-cpu · GitHub · f2ebb6f5 · ad971af8
Unverified Commit ad971af8 authored Apr 08, 2025 by zxfan-cpu Committed by GitHub Apr 07, 2025
Hide whitespace changes
Inline Side-by-side

Showing with 1 addition and 1 deletion

vllm/model_executor/layers/fused_moe/layer.py vllm/model_executor/layers/fused_moe/layer.py +1 -1

No files found.
--- a/vllm/model_executor/layers/fused_moe/layer.py
+++ b/vllm/model_executor/layers/fused_moe/layer.py
@@ -437,7 +437,7 @@ class FusedMoE(torch.nn.Module):
        # Use expert parallelism instead of tensor parallelism?
        vllm_config = get_current_vllm_config()
        use_ep = (vllm_config.parallel_config.enable_expert_parallel
-                  and self.tp_size > 1)
+                  and self.tp_size * self.dp_size > 1)
        # For smuggling this layer into the fused moe custom op
        self.use_direct_call = self.dp_size == 1