[Bugfix] Gracefully disable AllReduceFusionPass on GPUs without multicast support (#35085)

Signed-off-by: haosdent <haosdent@gmail.com>

[Bugfix] Gracefully disable AllReduceFusionPass on GPUs without multicast support (#35085)
Signed-off-by: haosdent <haosdent@gmail.com>
0788ff0a · haosdent · GitHub · d72b0be3 · 0788ff0a
Unverified Commit 0788ff0a authored Feb 25, 2026 by haosdent Committed by GitHub Feb 25, 2026
Show whitespace changes
Inline Side-by-side

Showing with 20 additions and 8 deletions

vllm/compilation/passes/fusion/allreduce_rms_fusion.py vllm/compilation/passes/fusion/allreduce_rms_fusion.py +20 -8

No files found.
--- a/vllm/compilation/passes/fusion/allreduce_rms_fusion.py
+++ b/vllm/compilation/passes/fusion/allreduce_rms_fusion.py
@@ -729,6 +729,7 @@ class AllReduceFusionPass(VllmPatternMatcherPass):
            scope="global",
        )
+        try:
            self.workspace = flashinfer_comm.create_allreduce_fusion_workspace(
                backend="trtllm",
                world_size=self.tp_size,
@@ -737,6 +738,17 @@ class AllReduceFusionPass(VllmPatternMatcherPass):
                hidden_dim=self.hidden_dim,
                dtype=self.model_dtype,
            )
+        except RuntimeError as e:
+            if "multicast" not in str(e).lower():
+                raise
+            logger.warning_once(
+                "AllReduce fusion pass is disabled: flashinfer workspace "
+                "creation failed: %s. This is expected on GPUs without "
+                "NVSwitch (e.g., NVLink bridge-only or PCIe topologies). "
+                "Falling back to non-fused allreduce.",
+                str(e),
+            )
+            return
        global _FI_WORKSPACE
        _FI_WORKSPACE = self.workspace