Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
a43a3f17
Unverified
Commit
a43a3f17
authored
Sep 04, 2025
by
Kebe
Committed by
GitHub
Sep 03, 2025
Browse files
[Bugfix][DP] DP distribution does not require ray[default] (#23822)
Signed-off-by:
Kebe
<
mail@kebe7jun.com
>
parent
6adaed42
Changes
1
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
10 additions
and
14 deletions
+10
-14
vllm/v1/engine/utils.py
vllm/v1/engine/utils.py
+10
-14
No files found.
vllm/v1/engine/utils.py
View file @
a43a3f17
...
...
@@ -315,7 +315,6 @@ class CoreEngineActorManager:
import
ray
from
ray._private.state
import
available_resources_per_node
from
ray.util.state
import
list_nodes
logger
.
info
(
"Creating placement groups for data parallel"
)
dp_master_ip
=
\
...
...
@@ -324,31 +323,28 @@ class CoreEngineActorManager:
local_engine_count
=
\
vllm_config
.
parallel_config
.
data_parallel_size_local
nodes
=
sorted
(
list_nodes
(
filters
=
[(
"state"
,
"="
,
"ALIVE"
)]),
key
=
lambda
node
:
node
.
node_ip
!=
dp_master_ip
)
assert
nodes
[
0
].
node_ip
==
dp_master_ip
,
(
"The head node is missing or dead"
)
assert
len
(
nodes
)
==
1
or
nodes
[
1
].
node_ip
!=
dp_master_ip
,
(
"There can only be one head node"
)
available_resources
=
available_resources_per_node
()
world_size
=
vllm_config
.
parallel_config
.
world_size
placement_groups
:
list
[
PlacementGroup
]
=
[]
local_dp_ranks
:
list
[
int
]
=
[]
for
node
in
nodes
:
node_ip
=
node
.
node_ip
node_resources
=
available_resources
[
node
.
node_id
]
dp_master_ip_key
=
f
'node:
{
dp_master_ip
}
'
nodes
=
sorted
(
available_resources
.
values
(),
key
=
lambda
x
:
dp_master_ip_key
not
in
x
)
assert
len
(
nodes
)
>
0
,
(
"No nodes with resources found in Ray cluster."
)
assert
dp_master_ip_key
in
nodes
[
0
],
(
"The DP master node (ip: %s) is missing or dead"
,
dp_master_ip
)
for
node_resources
in
nodes
:
if
"GPU"
not
in
node_resources
:
continue
# For now, each DP rank can only be assigned to one node
# TODO(rui): support allocating a single DP rank
# to multiple nodes
available_engine_count
=
int
(
node_resources
[
"GPU"
])
//
world_size
if
node_ip
==
dp_master_ip
:
if
dp_master_ip
_key
in
node_resources
:
assert
available_engine_count
>=
local_engine_count
,
(
"Not enough resources to allocate DP ranks "
f
"on DP master node
{
node
_ip
}
"
)
f
"on DP master node
{
dp_master
_ip
}
"
)
for
i
in
range
(
local_engine_count
):
bundles
=
[{
"GPU"
:
1.0
,
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment