Unverified Commit bdb903bb authored by Wentao Ye's avatar Wentao Ye Committed by GitHub
Browse files

[Bug] Fix FlashInfer MNNVL socket collisions under concurrent vLLM jobs (#36674)


Signed-off-by: default avataryewentao256 <zhyanwentao@126.com>
parent 68f783a7
...@@ -3,6 +3,8 @@ ...@@ -3,6 +3,8 @@
import atexit import atexit
import os
import random
import threading import threading
import torch import torch
...@@ -67,6 +69,9 @@ def initialize_fi_ar_workspace( ...@@ -67,6 +69,9 @@ def initialize_fi_ar_workspace(
backend = envs.VLLM_FLASHINFER_ALLREDUCE_BACKEND backend = envs.VLLM_FLASHINFER_ALLREDUCE_BACKEND
comm_backend = TorchDistBackend(group=group) comm_backend = TorchDistBackend(group=group)
rng_state = random.getstate()
try:
random.seed(int.from_bytes(os.urandom(16), byteorder="big"))
_fi_ar_workspace = flashinfer_comm.create_allreduce_fusion_workspace( _fi_ar_workspace = flashinfer_comm.create_allreduce_fusion_workspace(
backend=backend, backend=backend,
world_size=world_size, world_size=world_size,
...@@ -76,6 +81,8 @@ def initialize_fi_ar_workspace( ...@@ -76,6 +81,8 @@ def initialize_fi_ar_workspace(
dtype=dtype, dtype=dtype,
comm_backend=comm_backend, comm_backend=comm_backend,
) )
finally:
random.setstate(rng_state)
assert _fi_ar_workspace is not None assert _fi_ar_workspace is not None
logger.debug( logger.debug(
"Initialized FlashInfer All Reduce workspace: backend=%s, " "Initialized FlashInfer All Reduce workspace: backend=%s, "
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment