Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
10f535c0
Unverified
Commit
10f535c0
authored
Aug 21, 2025
by
Ming Yang
Committed by
GitHub
Aug 21, 2025
Browse files
[Bugfix] Fix port conflict by obtaining a list of open ports upfront (#21894)
Signed-off-by:
Ming Yang
<
minos.future@gmail.com
>
parent
48bfb0c9
Changes
3
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
35 additions
and
13 deletions
+35
-13
vllm/config/parallel.py
vllm/config/parallel.py
+18
-6
vllm/utils/__init__.py
vllm/utils/__init__.py
+14
-6
vllm/v1/engine/utils.py
vllm/v1/engine/utils.py
+3
-1
No files found.
vllm/config/parallel.py
View file @
10f535c0
...
@@ -15,7 +15,7 @@ import vllm.envs as envs
...
@@ -15,7 +15,7 @@ import vllm.envs as envs
from
vllm.config.utils
import
config
from
vllm.config.utils
import
config
from
vllm.logger
import
init_logger
from
vllm.logger
import
init_logger
from
vllm.platforms
import
current_platform
from
vllm.platforms
import
current_platform
from
vllm.utils
import
cuda_device_count_stateless
,
get_open_port
from
vllm.utils
import
cuda_device_count_stateless
,
get_open_port
s_list
if
TYPE_CHECKING
:
if
TYPE_CHECKING
:
from
ray.runtime_env
import
RuntimeEnv
from
ray.runtime_env
import
RuntimeEnv
...
@@ -171,6 +171,11 @@ class ParallelConfig:
...
@@ -171,6 +171,11 @@ class ParallelConfig:
rank
:
int
=
0
rank
:
int
=
0
"""Global rank in distributed setup."""
"""Global rank in distributed setup."""
_data_parallel_master_port_list
:
list
[
int
]
=
field
(
default_factory
=
list
)
"""List of open port auto-queried for data parallel messaging.
Set to be private as it's not intended to be configured by users.
"""
@
property
@
property
def
world_size_across_dp
(
self
)
->
int
:
def
world_size_across_dp
(
self
)
->
int
:
"""world_size_across_dp is TPxPPxDP, it is the size of the world
"""world_size_across_dp is TPxPPxDP, it is the size of the world
...
@@ -183,11 +188,15 @@ class ParallelConfig:
...
@@ -183,11 +188,15 @@ class ParallelConfig:
processes that is related to data parallelism,
processes that is related to data parallelism,
e.g. both in the worker and in the engine, which
e.g. both in the worker and in the engine, which
can live in different processes. To avoid port conflicts, we
can live in different processes. To avoid port conflicts, we
increment the port number
each time we need to
initialize a
pop a new port from the prepared port list
each time we need to
new process group related to data parallelism.
initialize a
new process group related to data parallelism.
"""
"""
if
self
.
_data_parallel_master_port_list
:
answer
=
self
.
_data_parallel_master_port_list
.
pop
()
else
:
answer
=
self
.
data_parallel_master_port
answer
=
self
.
data_parallel_master_port
self
.
data_parallel_master_port
+=
1
self
.
data_parallel_master_port
+=
1
return
answer
return
answer
def
stateless_init_dp_group
(
self
)
->
ProcessGroup
:
def
stateless_init_dp_group
(
self
)
->
ProcessGroup
:
...
@@ -313,7 +322,10 @@ class ParallelConfig:
...
@@ -313,7 +322,10 @@ class ParallelConfig:
if
self
.
data_parallel_size
>
1
or
self
.
data_parallel_size_local
==
0
:
if
self
.
data_parallel_size
>
1
or
self
.
data_parallel_size_local
==
0
:
# Data parallel was specified in the engine args.
# Data parallel was specified in the engine args.
self
.
data_parallel_master_port
=
get_open_port
()
if
not
self
.
_data_parallel_master_port_list
:
self
.
_data_parallel_master_port_list
=
get_open_ports_list
(
5
)
self
.
data_parallel_master_port
=
\
self
.
_data_parallel_master_port_list
.
pop
()
if
not
(
0
<=
self
.
data_parallel_rank
<
self
.
data_parallel_size
):
if
not
(
0
<=
self
.
data_parallel_rank
<
self
.
data_parallel_size
):
raise
ValueError
(
raise
ValueError
(
...
...
vllm/utils/__init__.py
View file @
10f535c0
...
@@ -940,6 +940,14 @@ def get_open_port() -> int:
...
@@ -940,6 +940,14 @@ def get_open_port() -> int:
return
_get_open_port
()
return
_get_open_port
()
def
get_open_ports_list
(
count
:
int
=
5
)
->
list
[
int
]:
"""Get a list of open ports."""
ports
=
set
()
while
len
(
ports
)
<
count
:
ports
.
add
(
get_open_port
())
return
list
(
ports
)
def
_get_open_port
()
->
int
:
def
_get_open_port
()
->
int
:
port
=
envs
.
VLLM_PORT
port
=
envs
.
VLLM_PORT
if
port
is
not
None
:
if
port
is
not
None
:
...
...
vllm/v1/engine/utils.py
View file @
10f535c0
...
@@ -71,7 +71,7 @@ class EngineHandshakeMetadata:
...
@@ -71,7 +71,7 @@ class EngineHandshakeMetadata:
connect to.
connect to.
"""
"""
addresses
:
EngineZmqAddresses
addresses
:
EngineZmqAddresses
parallel_config
:
dict
[
str
,
Union
[
int
,
str
]]
parallel_config
:
dict
[
str
,
Union
[
int
,
str
,
list
[
int
]
]]
class
CoreEngineProcManager
:
class
CoreEngineProcManager
:
...
@@ -798,6 +798,8 @@ def wait_for_engine_startup(
...
@@ -798,6 +798,8 @@ def wait_for_engine_startup(
parallel_config
.
data_parallel_master_ip
,
parallel_config
.
data_parallel_master_ip
,
"data_parallel_master_port"
:
"data_parallel_master_port"
:
parallel_config
.
data_parallel_master_port
,
parallel_config
.
data_parallel_master_port
,
"_data_parallel_master_port_list"
:
parallel_config
.
_data_parallel_master_port_list
,
"data_parallel_size"
:
"data_parallel_size"
:
parallel_config
.
data_parallel_size
,
parallel_config
.
data_parallel_size
,
}))
}))
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment