Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
dynamo
Commits
19447d66
Unverified
Commit
19447d66
authored
Mar 30, 2026
by
Ayush Agarwal
Committed by
GitHub
Mar 30, 2026
Browse files
chore: bump to vllm_omni 0.18.0 (#7685)
Signed-off-by:
ayushag
<
ayushag@nvidia.com
>
parent
db54ca2f
Changes
8
Hide whitespace changes
Inline
Side-by-side
Showing
8 changed files
with
99 additions
and
20 deletions
+99
-20
components/src/dynamo/vllm/main.py
components/src/dynamo/vllm/main.py
+5
-2
components/src/dynamo/vllm/omni/args.py
components/src/dynamo/vllm/omni/args.py
+5
-5
components/src/dynamo/vllm/tests/omni/test_omni_args.py
components/src/dynamo/vllm/tests/omni/test_omni_args.py
+35
-1
container/context.yaml
container/context.yaml
+1
-1
examples/backends/vllm/launch/agg_omni_i2v.sh
examples/backends/vllm/launch/agg_omni_i2v.sh
+25
-7
examples/backends/vllm/launch/agg_omni_image.sh
examples/backends/vllm/launch/agg_omni_image.sh
+11
-1
examples/backends/vllm/launch/agg_omni_video.sh
examples/backends/vllm/launch/agg_omni_video.sh
+15
-1
pyproject.toml
pyproject.toml
+2
-2
No files found.
components/src/dynamo/vllm/main.py
View file @
19447d66
...
@@ -7,7 +7,10 @@ import logging
...
@@ -7,7 +7,10 @@ import logging
import
os
import
os
import
tempfile
import
tempfile
import
time
import
time
from
typing
import
Any
,
Optional
from
typing
import
TYPE_CHECKING
,
Any
,
Optional
if
TYPE_CHECKING
:
from
dynamo.vllm.omni.args
import
OmniConfig
import
uvloop
import
uvloop
from
prometheus_client
import
REGISTRY
,
CollectorRegistry
,
multiprocess
from
prometheus_client
import
REGISTRY
,
CollectorRegistry
,
multiprocess
...
@@ -183,7 +186,7 @@ async def worker() -> None:
...
@@ -183,7 +186,7 @@ async def worker() -> None:
def
setup_metrics_collection
(
def
setup_metrics_collection
(
config
:
Config
,
generate_endpoint
:
Endpoint
,
logger
:
logging
.
Logger
config
:
"
Config
| OmniConfig"
,
generate_endpoint
:
Endpoint
,
logger
:
logging
.
Logger
)
->
None
:
)
->
None
:
"""Set up metrics collection for vLLM and LMCache metrics.
"""Set up metrics collection for vLLM and LMCache metrics.
...
...
components/src/dynamo/vllm/omni/args.py
View file @
19447d66
...
@@ -7,7 +7,7 @@ import argparse
...
@@ -7,7 +7,7 @@ import argparse
import
logging
import
logging
from
typing
import
Optional
from
typing
import
Optional
from
vllm_omni.engine.arg_utils
import
Async
OmniEngineArgs
from
vllm_omni.engine.arg_utils
import
OmniEngineArgs
try
:
try
:
from
vllm.utils
import
FlexibleArgumentParser
from
vllm.utils
import
FlexibleArgumentParser
...
@@ -27,7 +27,7 @@ logger = logging.getLogger(__name__)
...
@@ -27,7 +27,7 @@ logger = logging.getLogger(__name__)
class
OmniArgGroup
(
ArgGroup
):
class
OmniArgGroup
(
ArgGroup
):
"""Diffusion pipeline kwargs passed through to AsyncOmni() constructor.
"""Diffusion pipeline kwargs passed through to AsyncOmni() constructor.
These are NOT part of
Async
OmniEngineArgs (which handles vLLM engine-level
These are NOT part of OmniEngineArgs (which handles vLLM engine-level
args like model, tp, max_model_len). Instead they are direct constructor
args like model, tp, max_model_len). Instead they are direct constructor
kwargs for AsyncOmni and need Dynamo-side env-var (DYN_OMNI_*) support,
kwargs for AsyncOmni and need Dynamo-side env-var (DYN_OMNI_*) support,
so we define them here rather than relying on the upstream arg parser.
so we define them here rather than relying on the upstream arg parser.
...
@@ -197,7 +197,7 @@ class OmniConfig(DynamoRuntimeConfig):
...
@@ -197,7 +197,7 @@ class OmniConfig(DynamoRuntimeConfig):
served_model_name
:
Optional
[
str
]
=
None
served_model_name
:
Optional
[
str
]
=
None
# vLLM-Omni engine args
# vLLM-Omni engine args
engine_args
:
Async
OmniEngineArgs
engine_args
:
OmniEngineArgs
# OmniArgGroup fields (populated by from_cli_args)
# OmniArgGroup fields (populated by from_cli_args)
stage_configs_path
:
Optional
[
str
]
=
None
stage_configs_path
:
Optional
[
str
]
=
None
...
@@ -248,7 +248,7 @@ def parse_omni_args() -> OmniConfig:
...
@@ -248,7 +248,7 @@ def parse_omni_args() -> OmniConfig:
"vLLM-Omni Engine Options. Please refer to vLLM-Omni documentation for more details."
"vLLM-Omni Engine Options. Please refer to vLLM-Omni documentation for more details."
)
)
vllm_parser
=
FlexibleArgumentParser
(
add_help
=
False
)
vllm_parser
=
FlexibleArgumentParser
(
add_help
=
False
)
Async
OmniEngineArgs
.
add_cli_args
(
vllm_parser
,
async_args_only
=
False
)
OmniEngineArgs
.
add_cli_args
(
vllm_parser
)
for
action
in
vllm_parser
.
_actions
:
for
action
in
vllm_parser
.
_actions
:
if
not
action
.
option_strings
:
if
not
action
.
option_strings
:
...
@@ -265,7 +265,7 @@ def parse_omni_args() -> OmniConfig:
...
@@ -265,7 +265,7 @@ def parse_omni_args() -> OmniConfig:
vllm_args
=
vllm_parser
.
parse_args
(
unknown
)
vllm_args
=
vllm_parser
.
parse_args
(
unknown
)
config
.
model
=
vllm_args
.
model
config
.
model
=
vllm_args
.
model
engine_args
=
Async
OmniEngineArgs
.
from_cli_args
(
vllm_args
)
engine_args
=
OmniEngineArgs
.
from_cli_args
(
vllm_args
)
if
getattr
(
engine_args
,
"served_model_name"
,
None
)
is
not
None
:
if
getattr
(
engine_args
,
"served_model_name"
,
None
)
is
not
None
:
served
=
engine_args
.
served_model_name
served
=
engine_args
.
served_model_name
...
...
components/src/dynamo/vllm/tests/omni/test_omni_args.py
View file @
19447d66
...
@@ -22,7 +22,7 @@ pytestmark = [
...
@@ -22,7 +22,7 @@ pytestmark = [
def
_make_omni_config
(
**
overrides
)
->
OmniConfig
:
def
_make_omni_config
(
**
overrides
)
->
OmniConfig
:
"""Build a minimal OmniConfig with valid defaults, applying overrides."""
"""Build a minimal OmniConfig with valid defaults, applying overrides."""
defaults
=
{
defaults
:
dict
=
{
# DynamoRuntimeConfig fields
# DynamoRuntimeConfig fields
"namespace"
:
"dynamo"
,
"namespace"
:
"dynamo"
,
"component"
:
"backend"
,
"component"
:
"backend"
,
...
@@ -113,3 +113,37 @@ def test_omni_config_valid_boundary_ratio(ratio):
...
@@ -113,3 +113,37 @@ def test_omni_config_valid_boundary_ratio(ratio):
"""boundary_ratio within (0, 1] should pass."""
"""boundary_ratio within (0, 1] should pass."""
config
=
_make_omni_config
(
boundary_ratio
=
ratio
)
config
=
_make_omni_config
(
boundary_ratio
=
ratio
)
config
.
validate
()
# should not raise
config
.
validate
()
# should not raise
# --- vllm_omni API compatibility guards ---
# These tests catch regressions when vllm_omni is upgraded.
def
test_omni_engine_args_importable
():
"""vllm_omni.engine.arg_utils must export a usable engine args class."""
from
vllm_omni.engine.arg_utils
import
OmniEngineArgs
assert
hasattr
(
OmniEngineArgs
,
"add_cli_args"
)
assert
hasattr
(
OmniEngineArgs
,
"from_cli_args"
)
def
test_omni_engine_args_add_cli_args_no_extra_params
():
"""add_cli_args must accept a parser and no other required args."""
from
vllm_omni.engine.arg_utils
import
OmniEngineArgs
try
:
from
vllm.utils
import
FlexibleArgumentParser
except
ImportError
:
from
vllm.utils.argparse_utils
import
FlexibleArgumentParser
parser
=
FlexibleArgumentParser
(
add_help
=
False
)
OmniEngineArgs
.
add_cli_args
(
parser
)
def
test_omni_config_imports_cleanly
():
"""OmniConfig and parse_omni_args must be importable without error."""
from
dynamo.vllm.omni.args
import
OmniConfig
,
parse_omni_args
assert
OmniConfig
is
not
None
assert
callable
(
parse_omni_args
)
container/context.yaml
View file @
19447d66
...
@@ -61,7 +61,7 @@ vllm:
...
@@ -61,7 +61,7 @@ vllm:
vllm_ref
:
v0.16.0
vllm_ref
:
v0.16.0
flashinf_ref
:
v0.6.6
flashinf_ref
:
v0.6.6
lmcache_ref
:
0.4.2
lmcache_ref
:
0.4.2
vllm_omni_ref
:
"
v0.1
6
.0"
vllm_omni_ref
:
"
v0.1
8
.0"
max_jobs
:
"
10"
max_jobs
:
"
10"
enable_media_ffmpeg
:
"
false"
enable_media_ffmpeg
:
"
false"
enable_gpu_memory_service
:
"
true"
enable_gpu_memory_service
:
"
true"
...
...
examples/backends/vllm/launch/agg_omni_i2v.sh
View file @
19447d66
...
@@ -13,6 +13,8 @@
...
@@ -13,6 +13,8 @@
set
-e
set
-e
trap
'echo Cleaning up...; kill 0'
EXIT
trap
'echo Cleaning up...; kill 0'
EXIT
SCRIPT_DIR
=
"
$(
dirname
"
$(
readlink
-f
"
$0
"
)
"
)
"
source
"
$SCRIPT_DIR
/../../../common/launch_utils.sh"
MODEL
=
"Wan-AI/Wan2.2-TI2V-5B-Diffusers"
MODEL
=
"Wan-AI/Wan2.2-TI2V-5B-Diffusers"
...
@@ -35,13 +37,26 @@ while [[ $# -gt 0 ]]; do
...
@@ -35,13 +37,26 @@ while [[ $# -gt 0 ]]; do
esac
esac
done
done
echo
"=========================================="
HTTP_PORT
=
"
${
DYN_HTTP_PORT
:-
8000
}
"
echo
"Starting vLLM-Omni I2V Worker"
print_launch_banner
--no-curl
"Launching vLLM-Omni Image-to-Video (1 GPU)"
"
$MODEL
"
"
$HTTP_PORT
"
echo
"Model:
$MODEL
"
print_curl_footer
<<
CURL
echo
"=========================================="
curl -s http://localhost:
${
HTTP_PORT
}
/v1/videos
\\
-H 'Content-Type: application/json'
\\
-d '{
"model": "
${
MODEL
}
",
"prompt": "A bear sleeping",
"input_reference": "/tmp/input.png",
"size": "832x480",
"response_format": "url",
"nvext": {
"num_inference_steps": 40,
"num_frames": 33,
"guidance_scale": 1.0,
"boundary_ratio": 0.875
}
}' | jq
CURL
echo
"Starting frontend on port
${
DYN_HTTP_PORT
:-
8000
}
..."
python
-m
dynamo.frontend &
python
-m
dynamo.frontend &
FRONTEND_PID
=
$!
FRONTEND_PID
=
$!
...
@@ -53,4 +68,7 @@ DYN_SYSTEM_PORT=${DYN_SYSTEM_PORT:-8081} \
...
@@ -53,4 +68,7 @@ DYN_SYSTEM_PORT=${DYN_SYSTEM_PORT:-8081} \
--model
"
$MODEL
"
\
--model
"
$MODEL
"
\
--output-modalities
video
\
--output-modalities
video
\
--media-output-fs-url
file:///tmp/dynamo_media
\
--media-output-fs-url
file:///tmp/dynamo_media
\
"
${
EXTRA_ARGS
[@]
}
"
"
${
EXTRA_ARGS
[@]
}
"
&
# Exit on first worker failure; kill 0 in the EXIT trap tears down the rest
wait_any_exit
examples/backends/vllm/launch/agg_omni_image.sh
View file @
19447d66
...
@@ -26,7 +26,17 @@ while [[ $# -gt 0 ]]; do
...
@@ -26,7 +26,17 @@ while [[ $# -gt 0 ]]; do
done
done
HTTP_PORT
=
"
${
DYN_HTTP_PORT
:-
8000
}
"
HTTP_PORT
=
"
${
DYN_HTTP_PORT
:-
8000
}
"
print_launch_banner
"Launching vLLM-Omni Image Generation (1 GPU)"
"
$MODEL
"
"
$HTTP_PORT
"
print_launch_banner
--no-curl
"Launching vLLM-Omni Image Generation (1 GPU)"
"
$MODEL
"
"
$HTTP_PORT
"
print_curl_footer
<<
CURL
curl -s -X POST http://localhost:
${
HTTP_PORT
}
/v1/images/generations
\\
-H 'Content-Type: application/json'
\\
-d '{
"model": "
${
MODEL
}
",
"prompt": "A red apple on a white table",
"size": "512x512",
"num_inference_steps": 20
}' | jq
CURL
python
-m
dynamo.frontend &
python
-m
dynamo.frontend &
...
...
examples/backends/vllm/launch/agg_omni_video.sh
View file @
19447d66
...
@@ -26,7 +26,21 @@ while [[ $# -gt 0 ]]; do
...
@@ -26,7 +26,21 @@ while [[ $# -gt 0 ]]; do
done
done
HTTP_PORT
=
"
${
DYN_HTTP_PORT
:-
8000
}
"
HTTP_PORT
=
"
${
DYN_HTTP_PORT
:-
8000
}
"
print_launch_banner
"Launching vLLM-Omni Video Generation (1 GPU)"
"
$MODEL
"
"
$HTTP_PORT
"
print_launch_banner
--no-curl
"Launching vLLM-Omni Video Generation (1 GPU)"
"
$MODEL
"
"
$HTTP_PORT
"
print_curl_footer
<<
CURL
curl -s http://localhost:
${
HTTP_PORT
}
/v1/videos
\\
-H 'Content-Type: application/json'
\\
-d '{
"model": "
${
MODEL
}
",
"prompt": "Dog running on a beach",
"size": "832x480",
"response_format": "url",
"nvext": {
"num_inference_steps": 20,
"num_frames": 30
}
}' | jq
CURL
python
-m
dynamo.frontend &
python
-m
dynamo.frontend &
...
...
pyproject.toml
View file @
19447d66
...
@@ -51,10 +51,10 @@ vllm = [
...
@@ -51,10 +51,10 @@ vllm = [
"uvloop"
,
"uvloop"
,
"nixl[cu12]<=0.10.1"
,
"nixl[cu12]<=0.10.1"
,
"vllm[flashinfer,runai,otel]==0.18.0"
,
"vllm[flashinfer,runai,otel]==0.18.0"
,
# vllm-omni 0.1
6
.0 is now on PyPI; install only future rc builds from source in container builds
# vllm-omni 0.1
8
.0 is now on PyPI; install only future rc builds from source in container builds
# (see container/deps/vllm/install_vllm.sh). pip install ai-dynamo[vllm] will
# (see container/deps/vllm/install_vllm.sh). pip install ai-dynamo[vllm] will
# not include vllm-omni — install it separately from source if needed.
# not include vllm-omni — install it separately from source if needed.
"vllm-omni==0.1
6
.0"
,
"vllm-omni==0.1
8
.0"
,
"blake3>=1.0.0,<2.0.0"
,
"blake3>=1.0.0,<2.0.0"
,
]
]
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment