Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
9558f439
Unverified
Commit
9558f439
authored
Apr 26, 2026
by
Dao007forever
Committed by
GitHub
Apr 26, 2026
Browse files
[Bugfix] Size FlashInfer NVLink MNNVL workspace to EP group (#40893)
Signed-off-by:
Dao Le
<
Dao007forever@gmail.com
>
parent
8cd174fa
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
13 additions
and
7 deletions
+13
-7
vllm/distributed/device_communicators/all2all.py
vllm/distributed/device_communicators/all2all.py
+13
-7
No files found.
vllm/distributed/device_communicators/all2all.py
View file @
9558f439
...
@@ -492,15 +492,18 @@ class FlashInferNVLinkTwoSidedManager(All2AllManagerBase):
...
@@ -492,15 +492,18 @@ class FlashInferNVLinkTwoSidedManager(All2AllManagerBase):
CustomCommunicator
,
CustomCommunicator
,
)
)
dp_config
=
MnnvlConfig
(
# MNNVL workspace is allocated per rank in the comm_backend's group; the
comm_backend
=
CustomCommunicator
(
get_dp_group
().
cpu_group
),
# flashinfer kernel asserts workspace.size(0) == moe_ep_size, so the backend
# must span the EP group (= DP*PCP*TP), not the DP group.
ep_config
=
MnnvlConfig
(
comm_backend
=
CustomCommunicator
(
self
.
cpu_group
),
fabric_page_size
=
1
<<
29
,
# 512MB
fabric_page_size
=
1
<<
29
,
# 512MB
allocation_granularity
=
0
,
# Auto-detect
allocation_granularity
=
0
,
# Auto-detect
)
)
self
.
workspace_tensor
=
MnnvlMoe
.
get_moe_workspaces
(
self
.
mapping
,
d
p_config
)
self
.
workspace_tensor
=
MnnvlMoe
.
get_moe_workspaces
(
self
.
mapping
,
e
p_config
)
self
.
prepare_workspace_tensor
=
MnnvlMoe
.
get_moe_prepare_workspace
(
self
.
prepare_workspace_tensor
=
MnnvlMoe
.
get_moe_prepare_workspace
(
self
.
mapping
,
d
p_config
self
.
mapping
,
e
p_config
)
)
self
.
world_size
=
world_size
self
.
world_size
=
world_size
...
@@ -605,8 +608,11 @@ class FlashInferNVLinkOneSidedManager(All2AllManagerBase):
...
@@ -605,8 +608,11 @@ class FlashInferNVLinkOneSidedManager(All2AllManagerBase):
CustomCommunicator
,
CustomCommunicator
,
)
)
dp_config
=
MnnvlConfig
(
# MNNVL workspace is allocated per rank in the comm_backend's group; the
comm_backend
=
CustomCommunicator
(
get_dp_group
().
cpu_group
),
# flashinfer kernel asserts workspace.size(0) == moe_ep_size, so the backend
# must span the EP group (= DP*PCP*TP), not the DP group.
ep_config
=
MnnvlConfig
(
comm_backend
=
CustomCommunicator
(
self
.
cpu_group
),
)
)
total_dispatch_payload_size_per_token
=
(
total_dispatch_payload_size_per_token
=
(
hidden_size
//
2
# nvfp4 hidden states
hidden_size
//
2
# nvfp4 hidden states
...
@@ -628,7 +634,7 @@ class FlashInferNVLinkOneSidedManager(All2AllManagerBase):
...
@@ -628,7 +634,7 @@ class FlashInferNVLinkOneSidedManager(All2AllManagerBase):
top_k
=
top_k
,
top_k
=
top_k
,
num_experts
=
num_experts
,
num_experts
=
num_experts
,
workspace_size_per_rank
=
self
.
workspace_size
,
workspace_size_per_rank
=
self
.
workspace_size
,
mnnvl_config
=
d
p_config
,
mnnvl_config
=
e
p_config
,
)
)
self
.
gpus_per_node
=
gpus_per_node
self
.
gpus_per_node
=
gpus_per_node
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment