Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
dynamo
Commits
1e3e4a0c
Unverified
Commit
1e3e4a0c
authored
Jul 16, 2025
by
Alec
Committed by
GitHub
Jul 16, 2025
Browse files
fix: port race condition through deterministic ports (#1937)
parent
4ad281f2
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
162 additions
and
85 deletions
+162
-85
examples/vllm/components/args.py
examples/vllm/components/args.py
+155
-50
examples/vllm/components/main.py
examples/vllm/components/main.py
+5
-31
examples/vllm/launch/dep.sh
examples/vllm/launch/dep.sh
+1
-2
examples/vllm/launch/dsr1_dep.sh
examples/vllm/launch/dsr1_dep.sh
+1
-2
No files found.
examples/vllm/components/args.py
View file @
1e3e4a0c
...
...
@@ -13,9 +13,13 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import
asyncio
import
json
import
logging
import
os
import
socket
import
sys
import
time
from
typing
import
Optional
from
vllm.config
import
KVTransferConfig
...
...
@@ -30,14 +34,6 @@ DEFAULT_ENDPOINT = "dyn://dynamo.backend.generate"
DEFAULT_MODEL
=
"Qwen/Qwen3-0.6B"
def
find_free_port
()
->
int
:
"""Find a free port by binding to port 0."""
with
socket
.
socket
(
socket
.
AF_INET
,
socket
.
SOCK_STREAM
)
as
s
:
s
.
bind
((
""
,
0
))
port
=
s
.
getsockname
()[
1
]
return
port
class
Config
:
"""Command line parameters or defaults"""
...
...
@@ -45,8 +41,9 @@ class Config:
namespace
:
str
component
:
str
endpoint
:
str
kv_events_port
:
int
is_prefill_worker
:
bool
kv_port
:
Optional
[
int
]
=
None
side_channel_port
:
Optional
[
int
]
=
None
# mirror vLLM
model
:
str
...
...
@@ -56,38 +53,6 @@ class Config:
engine_args
:
AsyncEngineArgs
def
overwrite_args
(
config
):
defaults
=
{
"task"
:
"generate"
,
"skip_tokenizer_init"
:
True
,
"disable_log_requests"
:
True
,
"enable_prefix_caching"
:
True
,
# KV routing relies on logging KV metrics
"disable_log_stats"
:
False
,
# Always set up KV Events for routing
"kv_events_config"
:
KVEventsConfig
(
enable_kv_cache_events
=
True
,
publisher
=
"zmq"
,
endpoint
=
f
"tcp://*:
{
config
.
kv_events_port
}
"
,
),
# Always setting up kv transfer for disagg
"kv_transfer_config"
:
KVTransferConfig
(
kv_connector
=
"NixlConnector"
,
kv_role
=
"kv_both"
),
}
# Made decision to always overwrite.
# Respecting users original cmd line args at all costs requires a bunch of arg parse work
logger
.
debug
(
"Setting Dynamo defaults for vLLM"
)
for
key
,
value
in
defaults
.
items
():
if
hasattr
(
config
.
engine_args
,
key
):
setattr
(
config
.
engine_args
,
key
,
value
)
logger
.
debug
(
f
" engine_args.
{
key
}
=
{
value
}
"
)
else
:
raise
ValueError
(
f
"
{
key
}
not found in AsyncEngineArgs from vLLM."
)
def
parse_args
()
->
Config
:
parser
=
FlexibleArgumentParser
(
description
=
"vLLM server integrated with Dynamo LLM."
...
...
@@ -103,12 +68,6 @@ def parse_args() -> Config:
action
=
"store_true"
,
help
=
"Enable prefill functionality for this worker. Currently overwrites the --endpoint to be a specially chosen dyn://dynamo.prefill.generate"
,
)
parser
.
add_argument
(
"--kv-events-port"
,
type
=
int
,
default
=
find_free_port
(),
help
=
"Endpoint where vLLM publishes metrics for dynamo. For DP, we handle the port iteration."
,
)
parser
=
AsyncEngineArgs
.
add_cli_args
(
parser
)
args
=
parser
.
parse_args
()
...
...
@@ -143,7 +102,6 @@ def parse_args() -> Config:
config
.
endpoint
=
parsed_endpoint_name
config
.
engine_args
=
engine_args
config
.
is_prefill_worker
=
args
.
is_prefill_worker
config
.
kv_events_port
=
args
.
kv_events_port
if
config
.
engine_args
.
block_size
is
None
:
config
.
engine_args
.
block_size
=
16
...
...
@@ -151,6 +109,153 @@ def parse_args() -> Config:
f
"Setting reasonable default of
{
config
.
engine_args
.
block_size
}
for block_size"
)
overwrite_args
(
config
)
return
config
async
def
allocate_and_reserve_port
(
namespace
,
etcd_client
,
worker_id
:
str
,
reason
:
str
,
max_attempts
:
int
=
100
,
)
->
int
:
"""
Get an OS-assigned port and atomically reserve it in ETCD.
Retries until successful or max_attempts reached.
Args:
max_attempts: Maximum number of ports to try (default: 100)
Raises:
RuntimeError: If unable to reserve a port within max_attempts
OSError: If unable to create sockets (system resource issues)
"""
node_name
=
socket
.
gethostname
()
for
attempt
in
range
(
1
,
max_attempts
+
1
):
# Hold socket open just long enough to reserve in ETCD
with
socket
.
socket
(
socket
.
AF_INET
,
socket
.
SOCK_STREAM
)
as
sock
:
sock
.
setsockopt
(
socket
.
SOL_SOCKET
,
socket
.
SO_REUSEADDR
,
1
)
sock
.
bind
((
""
,
0
))
port
=
sock
.
getsockname
()[
1
]
# Reserve in ETCD while holding the socket
key
=
f
"dyn://
{
namespace
}
/ports/
{
node_name
}
/
{
port
}
"
value
=
{
"worker_id"
:
worker_id
,
"reason"
:
reason
,
"reserved_at"
:
time
.
time
(),
"pid"
:
os
.
getpid
(),
}
try
:
await
etcd_client
.
kv_create
(
key
=
key
,
value
=
json
.
dumps
(
value
).
encode
(),
lease_id
=
etcd_client
.
primary_lease_id
(),
)
logger
.
debug
(
f
"Reserved OS-assigned port
{
port
}
for
{
worker_id
}
"
)
return
port
except
Exception
as
e
:
logger
.
debug
(
f
"Port
{
port
}
on
{
node_name
}
was already reserved (attempt
{
attempt
}
):
{
e
}
"
)
if
attempt
<
max_attempts
:
await
asyncio
.
sleep
(
0.01
)
raise
RuntimeError
(
f
"Failed to allocate and reserve a port after
{
max_attempts
}
attempts"
)
async
def
configure_ports_with_etcd
(
config
:
Config
,
etcd_client
):
"""Configure all settings that require ETCD, including port allocation and vLLM overrides."""
# First, allocate ports
dp_rank
=
config
.
engine_args
.
data_parallel_rank
or
0
worker_id
=
f
"vllm-
{
config
.
component
}
-dp
{
dp_rank
}
"
# Allocate KV events port
kv_port
=
await
allocate_and_reserve_port
(
namespace
=
config
.
namespace
,
etcd_client
=
etcd_client
,
worker_id
=
f
"
{
worker_id
}
"
,
reason
=
"zmq_kv_event_port"
,
)
# Allocate side channel port
side_channel_port
=
await
allocate_and_reserve_port
(
namespace
=
config
.
namespace
,
etcd_client
=
etcd_client
,
worker_id
=
f
"
{
worker_id
}
"
,
reason
=
"nixl_side_channel_port"
,
)
# Update config with allocated ports
config
.
kv_port
=
kv_port
config
.
side_channel_port
=
side_channel_port
def
overwrite_args
(
config
):
"""Set vLLM defaults for Dynamo."""
assert
(
config
.
kv_port
is
not
None
),
"Must set the kv_port, use configure_ports_with_etcd"
assert
(
config
.
side_channel_port
is
not
None
),
"Must set the kv_port, use configure_ports_with_etcd"
dp_rank
=
config
.
engine_args
.
data_parallel_rank
or
0
defaults
=
{
"task"
:
"generate"
,
"skip_tokenizer_init"
:
True
,
"disable_log_requests"
:
True
,
"enable_prefix_caching"
:
True
,
# KV routing relies on logging KV metrics
"disable_log_stats"
:
False
,
# Always setting up kv transfer for disagg
"kv_transfer_config"
:
KVTransferConfig
(
kv_connector
=
"NixlConnector"
,
kv_role
=
"kv_both"
),
"kv_events_config"
:
KVEventsConfig
(
enable_kv_cache_events
=
True
,
publisher
=
"zmq"
,
endpoint
=
f
"tcp://*:
{
config
.
kv_port
-
dp_rank
}
"
,
# vLLM will iterate dp_rank for us, so we need to subtract it out TODO: fix in vLLM
),
}
set_side_channel_host_and_port
(
config
)
logger
.
debug
(
"Setting Dynamo defaults for vLLM"
)
for
key
,
value
in
defaults
.
items
():
if
hasattr
(
config
.
engine_args
,
key
):
setattr
(
config
.
engine_args
,
key
,
value
)
logger
.
debug
(
f
" engine_args.
{
key
}
=
{
value
}
"
)
else
:
raise
ValueError
(
f
"
{
key
}
not found in AsyncEngineArgs from vLLM."
)
def
set_side_channel_host_and_port
(
config
:
Config
,
hostname
:
Optional
[
str
]
=
None
):
"""vLLM V1 NixlConnector creates a side channel to exchange metadata with other NIXL connectors.
This sets the port number for the side channel.
"""
if
hostname
is
None
:
hostname
=
socket
.
gethostname
()
# Test if hostname is usable by attempting to bind to it
try
:
with
socket
.
socket
(
socket
.
AF_INET
,
socket
.
SOCK_STREAM
)
as
test_socket
:
test_socket
.
bind
((
hostname
,
0
))
except
(
socket
.
error
,
socket
.
gaierror
):
# If hostname is not usable, fall back to localhost
logger
.
warning
(
f
"Hostname '
{
hostname
}
' is not usable, falling back to '127.0.0.1'"
)
hostname
=
"127.0.0.1"
os
.
environ
[
"VLLM_NIXL_SIDE_CHANNEL_HOST"
]
=
hostname
os
.
environ
[
"VLLM_NIXL_SIDE_CHANNEL_PORT"
]
=
str
(
config
.
side_channel_port
)
logger
.
debug
(
f
"Set NIXL side channel to
{
hostname
}
:
{
config
.
side_channel_port
}
"
)
examples/vllm/components/main.py
View file @
1e3e4a0c
...
...
@@ -17,11 +17,9 @@ import asyncio
import
logging
import
os
import
signal
import
socket
from
typing
import
Optional
import
uvloop
from
args
import
Config
,
find_free_port
,
parse_args
from
args
import
Config
,
configure_ports_with_etcd
,
overwrite_args
,
parse_args
from
handlers
import
DecodeWorkerHandler
,
PrefillWorkerHandler
from
publisher
import
StatLoggerFactory
from
vllm.distributed.kv_events
import
ZmqEventPublisher
...
...
@@ -57,6 +55,10 @@ async def graceful_shutdown(runtime):
async
def
worker
(
runtime
:
DistributedRuntime
):
config
=
parse_args
()
etcd_client
=
runtime
.
etcd_client
()
await
configure_ports_with_etcd
(
config
,
etcd_client
)
overwrite_args
(
config
)
# Set up signal handler for graceful shutdown
loop
=
asyncio
.
get_running_loop
()
...
...
@@ -78,8 +80,6 @@ def setup_vllm_engine(config, stat_logger=None):
os
.
environ
[
"VLLM_NO_USAGE_STATS"
]
=
"1"
# Avoid internal HTTP requests
os
.
environ
[
"VLLM_WORKER_MULTIPROC_METHOD"
]
=
"spawn"
set_side_channel_host_and_port
()
engine_args
=
config
.
engine_args
# Load default sampling params from `generation_config.json`
default_sampling_params
=
(
...
...
@@ -105,32 +105,6 @@ def setup_vllm_engine(config, stat_logger=None):
return
engine_client
,
vllm_config
,
default_sampling_params
def
set_side_channel_host_and_port
(
hostname
:
Optional
[
str
]
=
None
,
port
:
Optional
[
int
]
=
None
):
"""vLLM V1 NixlConnector creates a side channel to exchange metadata with other NIXL connectors.
This sets the port number for the side channel.
"""
if
hostname
is
None
:
hostname
=
socket
.
gethostname
()
# Test if hostname is usable by attempting to bind to it
try
:
with
socket
.
socket
(
socket
.
AF_INET
,
socket
.
SOCK_STREAM
)
as
test_socket
:
test_socket
.
bind
((
hostname
,
0
))
except
(
socket
.
error
,
socket
.
gaierror
):
# If hostname is not usable, fall back to localhost
logger
.
warning
(
f
"Hostname '
{
hostname
}
' is not usable, falling back to '127.0.0.1'"
)
hostname
=
"127.0.0.1"
if
port
is
None
:
port
=
find_free_port
()
logger
.
debug
(
"Setting VLLM_NIXL_SIDE_CHANNEL_HOST to %s"
,
hostname
)
os
.
environ
[
"VLLM_NIXL_SIDE_CHANNEL_HOST"
]
=
hostname
logger
.
debug
(
"Setting VLLM_NIXL_SIDE_CHANNEL_PORT to %s"
,
port
)
os
.
environ
[
"VLLM_NIXL_SIDE_CHANNEL_PORT"
]
=
str
(
port
)
async
def
init_prefill
(
runtime
:
DistributedRuntime
,
config
:
Config
):
"""
Instantiate and serve
...
...
examples/vllm/launch/dep.sh
View file @
1e3e4a0c
...
...
@@ -16,8 +16,7 @@ for i in {0..3}; do
--data-parallel-rank
$i
\
--data-parallel-size
4
\
--enable-expert-parallel
\
--enforce-eager
\
--kv-events-port
49500 &
--enforce-eager
&
done
echo
"All workers starting. (press Ctrl+C to stop)..."
...
...
examples/vllm/launch/dsr1_dep.sh
View file @
1e3e4a0c
...
...
@@ -98,8 +98,7 @@ for ((i=0; i<GPUS_PER_NODE; i++)); do
--data-parallel-address
$MASTER_ADDR
\
--data-parallel-rpc-port
13345
\
--gpu-memory-utilization
0.95
\
--enforce-eager
\
--kv-events-port
49700 2>&1 |
tee
$LOG_DIR
/dsr1_dep_
${
dp_rank
}
.log &
--enforce-eager
2>&1 |
tee
$LOG_DIR
/dsr1_dep_
${
dp_rank
}
.log &
done
echo
"All workers starting. (press Ctrl+C to stop)..."
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment