Unverified Commit bdb903bb authored by Wentao Ye's avatar Wentao Ye Committed by GitHub
Browse files

[Bug] Fix FlashInfer MNNVL socket collisions under concurrent vLLM jobs (#36674)


Signed-off-by: default avataryewentao256 <zhyanwentao@126.com>
parent 68f783a7
...@@ -3,6 +3,8 @@ ...@@ -3,6 +3,8 @@
import atexit import atexit
import os
import random
import threading import threading
import torch import torch
...@@ -67,15 +69,20 @@ def initialize_fi_ar_workspace( ...@@ -67,15 +69,20 @@ def initialize_fi_ar_workspace(
backend = envs.VLLM_FLASHINFER_ALLREDUCE_BACKEND backend = envs.VLLM_FLASHINFER_ALLREDUCE_BACKEND
comm_backend = TorchDistBackend(group=group) comm_backend = TorchDistBackend(group=group)
_fi_ar_workspace = flashinfer_comm.create_allreduce_fusion_workspace( rng_state = random.getstate()
backend=backend, try:
world_size=world_size, random.seed(int.from_bytes(os.urandom(16), byteorder="big"))
rank=rank, _fi_ar_workspace = flashinfer_comm.create_allreduce_fusion_workspace(
max_token_num=max_token_num, backend=backend,
hidden_dim=hidden_dim, world_size=world_size,
dtype=dtype, rank=rank,
comm_backend=comm_backend, max_token_num=max_token_num,
) hidden_dim=hidden_dim,
dtype=dtype,
comm_backend=comm_backend,
)
finally:
random.setstate(rng_state)
assert _fi_ar_workspace is not None assert _fi_ar_workspace is not None
logger.debug( logger.debug(
"Initialized FlashInfer All Reduce workspace: backend=%s, " "Initialized FlashInfer All Reduce workspace: backend=%s, "
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment