Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
dynamo
Commits
233a1e9a
Unverified
Commit
233a1e9a
authored
Feb 13, 2026
by
Hyunjae Woo
Committed by
GitHub
Feb 13, 2026
Browse files
feat: Enable ModelExpress P2P weight transfer in Dynamo vLLM worker (#6186)
Signed-off-by:
Hyunjae Woo
<
hwoo@nvidia.com
>
parent
5624d144
Changes
7
Show whitespace changes
Inline
Side-by-side
Showing
7 changed files
with
122 additions
and
0 deletions
+122
-0
components/src/dynamo/vllm/args.py
components/src/dynamo/vllm/args.py
+9
-0
components/src/dynamo/vllm/backend_args.py
components/src/dynamo/vllm/backend_args.py
+13
-0
components/src/dynamo/vllm/main.py
components/src/dynamo/vllm/main.py
+16
-0
components/src/dynamo/vllm/tests/test_vllm_unit.py
components/src/dynamo/vllm/tests/test_vllm_unit.py
+69
-0
container/context.yaml
container/context.yaml
+2
-0
container/templates/args.Dockerfile
container/templates/args.Dockerfile
+4
-0
container/templates/vllm_runtime.Dockerfile
container/templates/vllm_runtime.Dockerfile
+9
-0
No files found.
components/src/dynamo/vllm/args.py
View file @
233a1e9a
...
...
@@ -85,6 +85,7 @@ def parse_args() -> Config:
parser
=
argparse
.
ArgumentParser
(
description
=
"Dynamo vLLM worker configuration"
,
formatter_class
=
argparse
.
RawTextHelpFormatter
,
allow_abbrev
=
False
,
)
# Build argument parser
...
...
@@ -215,6 +216,14 @@ def update_dynamo_config_with_engine(
)
dynamo_config
.
connector
=
normalized
# type: ignore[assignment]
# Validate ModelExpress P2P server URL
if
getattr
(
engine_config
,
"load_format"
,
None
)
in
(
"mx-source"
,
"mx-target"
):
if
not
dynamo_config
.
model_express_url
:
raise
ValueError
(
f
"--model-express-url or MODEL_EXPRESS_URL env var is required "
f
"when using --load-format=
{
engine_config
.
load_format
}
"
)
def
update_engine_config_with_dynamo
(
dynamo_config
:
Config
,
engine_config
:
AsyncEngineArgs
...
...
components/src/dynamo/vllm/backend_args.py
View file @
233a1e9a
...
...
@@ -189,6 +189,16 @@ class DynamoVllmArgGroup(ArgGroup):
help
=
"Path to vLLM-Omni stage configuration YAML file for --omni mode (optional)."
,
)
# ModelExpress P2P
add_argument
(
g
,
flag_name
=
"--model-express-url"
,
env_var
=
"MODEL_EXPRESS_URL"
,
default
=
None
,
help
=
"ModelExpress P2P server URL (e.g., http://mx-server:8080). "
"Required when using --load-format=mx-source or --load-format=mx-target."
,
)
# @dataclass()
class
DynamoVllmConfig
(
ConfigBase
):
...
...
@@ -221,6 +231,9 @@ class DynamoVllmConfig(ConfigBase):
omni
:
bool
stage_configs_path
:
Optional
[
str
]
=
None
# ModelExpress P2P
model_express_url
:
Optional
[
str
]
=
None
def
validate
(
self
)
->
None
:
"""Validate vLLM wrapper configuration."""
self
.
_validate_prefill_decode_exclusive
()
...
...
components/src/dynamo/vllm/main.py
View file @
233a1e9a
...
...
@@ -401,6 +401,22 @@ def setup_vllm_engine(config, stat_logger=None):
if
engine_args
.
load_format
==
"gms"
:
engine_args
.
worker_cls
=
"gpu_memory_service.integrations.vllm.worker.GMSWorker"
if
engine_args
.
load_format
in
(
"mx-source"
,
"mx-target"
):
try
:
from
modelexpress
import
register_modelexpress_loaders
# Ensure the ModelExpress server URL env var is set for the model loader
if
config
.
model_express_url
:
os
.
environ
[
"MODEL_EXPRESS_URL"
]
=
config
.
model_express_url
register_modelexpress_loaders
()
# Use wrapper worker to ensure loaders are registered in spawned worker processes
engine_args
.
worker_cls
=
"modelexpress.vllm_worker.ModelExpressWorker"
except
ImportError
as
e
:
raise
ImportError
(
f
"ModelExpress package required for --load-format=
{
engine_args
.
load_format
}
. "
"Install with: pip install modelexpress"
)
from
e
# Load default sampling params from `generation_config.json`
default_sampling_params
=
(
engine_args
.
create_model_config
().
get_diff_sampling_param
()
...
...
components/src/dynamo/vllm/tests/test_vllm_unit.py
View file @
233a1e9a
...
...
@@ -71,3 +71,72 @@ def test_custom_jinja_template_env_var_expansion(monkeypatch, mock_vllm_cli):
f
"Expected custom_jinja_template value to be
{
JINJA_TEMPLATE_PATH
}
, "
f
"got
{
config
.
custom_jinja_template
}
"
)
@
pytest
.
mark
.
parametrize
(
"load_format"
,
[
"mx-source"
,
"mx-target"
])
def
test_model_express_url_from_cli_arg
(
mock_vllm_cli
,
load_format
):
"""Test that --model-express-url is stored when load format is mx-source/mx-target."""
mock_vllm_cli
(
"--model"
,
"Qwen/Qwen3-0.6B"
,
"--load-format"
,
load_format
,
"--model-express-url"
,
"http://mx-server:8080"
,
)
config
=
parse_args
()
assert
config
.
model_express_url
==
"http://mx-server:8080"
@
pytest
.
mark
.
parametrize
(
"load_format"
,
[
"mx-source"
,
"mx-target"
])
def
test_model_express_url_from_env_var
(
monkeypatch
,
mock_vllm_cli
,
load_format
):
"""Test that MODEL_EXPRESS_URL env var is used as fallback."""
monkeypatch
.
setenv
(
"MODEL_EXPRESS_URL"
,
"http://env-mx:9090"
)
mock_vllm_cli
(
"--model"
,
"Qwen/Qwen3-0.6B"
,
"--load-format"
,
load_format
,
)
config
=
parse_args
()
assert
config
.
model_express_url
==
"http://env-mx:9090"
@
pytest
.
mark
.
parametrize
(
"load_format"
,
[
"mx-source"
,
"mx-target"
])
def
test_model_express_url_cli_overrides_env
(
monkeypatch
,
mock_vllm_cli
,
load_format
):
"""Test that --model-express-url takes precedence over MODEL_EXPRESS_URL."""
monkeypatch
.
setenv
(
"MODEL_EXPRESS_URL"
,
"http://env-mx:9090"
)
mock_vllm_cli
(
"--model"
,
"Qwen/Qwen3-0.6B"
,
"--load-format"
,
load_format
,
"--model-express-url"
,
"http://cli-mx:8080"
,
)
config
=
parse_args
()
assert
config
.
model_express_url
==
"http://cli-mx:8080"
@
pytest
.
mark
.
parametrize
(
"load_format"
,
[
"mx-source"
,
"mx-target"
])
def
test_model_express_url_missing_raises
(
monkeypatch
,
mock_vllm_cli
,
load_format
):
"""Test that missing server URL raises ValueError for mx load formats."""
monkeypatch
.
delenv
(
"MODEL_EXPRESS_URL"
,
raising
=
False
)
mock_vllm_cli
(
"--model"
,
"Qwen/Qwen3-0.6B"
,
"--load-format"
,
load_format
,
)
with
pytest
.
raises
(
ValueError
,
match
=
re
.
escape
(
f
"--load-format=
{
load_format
}
"
),
):
parse_args
()
def
test_model_express_url_none_for_default_load_format
(
mock_vllm_cli
):
"""Test that model_express_url is None when load format is not mx-*."""
mock_vllm_cli
(
"--model"
,
"Qwen/Qwen3-0.6B"
)
config
=
parse_args
()
assert
config
.
model_express_url
is
None
container/context.yaml
View file @
233a1e9a
...
...
@@ -45,6 +45,8 @@ vllm:
enable_media_ffmpeg
:
"
true"
enable_gpu_memory_service
:
"
true"
enable_kvbm
:
"
true"
enable_modelexpress_p2p
:
"
false"
modelexpress_ref
:
"
3d73992ce6c10e52ddc54f7f12af35d27e173f15"
sglang
:
base_image
:
nvcr.io/nvidia/cuda-dl-base
...
...
container/templates/args.Dockerfile
View file @
233a1e9a
...
...
@@ -85,6 +85,10 @@ ARG LMCACHE_REF={{ context.vllm.lmcache_ref }}
# If left blank, then we will fallback to vLLM defaults
ARG
DEEPGEMM_REF=""
# ModelExpress for P2P weight transfer (optional)
ARG
ENABLE_MODELEXPRESS_P2P={{ context.vllm.enable_modelexpress_p2p }}
ARG
MODELEXPRESS_REF={{ context.vllm.modelexpress_ref }}
{%- endif -%}
{% if framework == "trtllm" %}
...
...
container/templates/vllm_runtime.Dockerfile
View file @
233a1e9a
...
...
@@ -214,6 +214,15 @@ RUN --mount=type=cache,target=/home/dynamo/.cache/uv,uid=1000,gid=0,mode=0775 \
# pip/uv bypasses umask when creating .egg-info files, but chmod -R is fast here (small directory)
chmod -R g+w /workspace/benchmarks
# Install ModelExpress for P2P weight transfer (optional)
ARG
ENABLE_MODELEXPRESS_P2P
ARG
MODELEXPRESS_REF
RUN if
[
"
${
ENABLE_MODELEXPRESS_P2P
}
"
=
"true"
]
;
then
\
echo
"Installing ModelExpress from ref:
${
MODELEXPRESS_REF
}
"
&&
\
uv pip
install
"modelexpress @ git+https://github.com/ai-dynamo/modelexpress.git@
${
MODELEXPRESS_REF
}
#subdirectory=modelexpress_client/python"
;
\
fi
# Install common and test dependencies. Cache uv downloads; uv handles its own locking for this cache.
RUN
--mount
=
type
=
bind
,source
=
./container/deps/requirements.txt,target
=
/tmp/requirements.txt
\
--mount
=
type
=
bind
,source
=
./container/deps/requirements.test.txt,target
=
/tmp/requirements.test.txt
\
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment