[Bug] Fix FlashInfer MNNVL socket collisions under concurrent vLLM jobs (#36674)

Signed-off-by: yewentao256 <zhyanwentao@126.com>

[Bug] Fix FlashInfer MNNVL socket collisions under concurrent vLLM jobs (#36674)
Signed-off-by: yewentao256 <zhyanwentao@126.com>
bdb903bb · Wentao Ye · GitHub · 68f783a7 · bdb903bb
Unverified Commit bdb903bb authored Mar 17, 2026 by Wentao Ye Committed by GitHub Mar 17, 2026
Hide whitespace changes
Inline Side-by-side

Showing with 16 additions and 9 deletions

vllm/distributed/device_communicators/flashinfer_all_reduce.py ...distributed/device_communicators/flashinfer_all_reduce.py +16 -9

No files found.
--- a/vllm/distributed/device_communicators/flashinfer_all_reduce.py
+++ b/vllm/distributed/device_communicators/flashinfer_all_reduce.py
@@ -3,6 +3,8 @@
 import atexit
+import os
+import random
 import threading
 import torch
@@ -67,15 +69,20 @@ def initialize_fi_ar_workspace(
    backend = envs.VLLM_FLASHINFER_ALLREDUCE_BACKEND
    comm_backend = TorchDistBackend(group=group)
-    _fi_ar_workspace = flashinfer_comm.create_allreduce_fusion_workspace(
+    rng_state = random.getstate()
-        backend=backend,
+    try:
-        world_size=world_size,
+        random.seed(int.from_bytes(os.urandom(16), byteorder="big"))
-        rank=rank,
+        _fi_ar_workspace = flashinfer_comm.create_allreduce_fusion_workspace(
-        max_token_num=max_token_num,
+            backend=backend,
-        hidden_dim=hidden_dim,
+            world_size=world_size,
-        dtype=dtype,
+            rank=rank,
-        comm_backend=comm_backend,
+            max_token_num=max_token_num,
-    )
+            hidden_dim=hidden_dim,
+            dtype=dtype,
+            comm_backend=comm_backend,
+        )
+    finally:
+        random.setstate(rng_state)
    assert _fi_ar_workspace is not None
    logger.debug(
        "Initialized FlashInfer All Reduce workspace: backend=%s, "