chore: bump to vllm_omni 0.18.0 (#7685)

Signed-off-by: ayushag <ayushag@nvidia.com>

chore: bump to vllm_omni 0.18.0 (#7685)
Signed-off-by: ayushag <ayushag@nvidia.com>
19447d66 · Ayush Agarwal · GitHub · db54ca2f · 19447d66 · 19447d66
Unverified Commit 19447d66 authored Mar 30, 2026 by Ayush Agarwal Committed by GitHub Mar 30, 2026
8 changed files
--- a/components/src/dynamo/vllm/main.py
+++ b/components/src/dynamo/vllm/main.py
@@ -7,7 +7,10 @@ import logging
 import os
 import tempfile
 import time
-from typing import Any, Optional
+from typing import TYPE_CHECKING, Any, Optional
+if TYPE_CHECKING:
+    from dynamo.vllm.omni.args import OmniConfig
 import uvloop
 from prometheus_client import REGISTRY, CollectorRegistry, multiprocess
@@ -183,7 +186,7 @@ async def worker() -> None:
 def setup_metrics_collection(
-    config: Config, generate_endpoint: Endpoint, logger: logging.Logger
+    config: "Config | OmniConfig", generate_endpoint: Endpoint, logger: logging.Logger
 ) -> None:
    """Set up metrics collection for vLLM and LMCache metrics.

--- a/components/src/dynamo/vllm/omni/args.py
+++ b/components/src/dynamo/vllm/omni/args.py
@@ -7,7 +7,7 @@ import argparse
 import logging
 from typing import Optional
-from vllm_omni.engine.arg_utils import AsyncOmniEngineArgs
+from vllm_omni.engine.arg_utils import OmniEngineArgs
 try:
    from vllm.utils import FlexibleArgumentParser
@@ -27,7 +27,7 @@ logger = logging.getLogger(__name__)
 class OmniArgGroup(ArgGroup):
    """Diffusion pipeline kwargs passed through to AsyncOmni() constructor.
-    These are NOT part of AsyncOmniEngineArgs (which handles vLLM engine-level
+    These are NOT part of OmniEngineArgs (which handles vLLM engine-level
    args like model, tp, max_model_len). Instead they are direct constructor
    kwargs for AsyncOmni and need Dynamo-side env-var (DYN_OMNI_*) support,
    so we define them here rather than relying on the upstream arg parser.
@@ -197,7 +197,7 @@ class OmniConfig(DynamoRuntimeConfig):
    served_model_name: Optional[str] = None
    # vLLM-Omni engine args
-    engine_args: AsyncOmniEngineArgs
+    engine_args: OmniEngineArgs
    # OmniArgGroup fields (populated by from_cli_args)
    stage_configs_path: Optional[str] = None
@@ -248,7 +248,7 @@ def parse_omni_args() -> OmniConfig:
        "vLLM-Omni Engine Options. Please refer to vLLM-Omni documentation for more details."
    )
    vllm_parser = FlexibleArgumentParser(add_help=False)
-    AsyncOmniEngineArgs.add_cli_args(vllm_parser, async_args_only=False)
+    OmniEngineArgs.add_cli_args(vllm_parser)
    for action in vllm_parser._actions:
        if not action.option_strings:
@@ -265,7 +265,7 @@ def parse_omni_args() -> OmniConfig:
    vllm_args = vllm_parser.parse_args(unknown)
    config.model = vllm_args.model
-    engine_args = AsyncOmniEngineArgs.from_cli_args(vllm_args)
+    engine_args = OmniEngineArgs.from_cli_args(vllm_args)
    if getattr(engine_args, "served_model_name", None) is not None:
        served = engine_args.served_model_name

--- a/components/src/dynamo/vllm/tests/omni/test_omni_args.py
+++ b/components/src/dynamo/vllm/tests/omni/test_omni_args.py
@@ -22,7 +22,7 @@ pytestmark = [
 def _make_omni_config(**overrides) -> OmniConfig:
    """Build a minimal OmniConfig with valid defaults, applying overrides."""
-    defaults = {
+    defaults: dict = {
        # DynamoRuntimeConfig fields
        "namespace": "dynamo",
        "component": "backend",
@@ -113,3 +113,37 @@ def test_omni_config_valid_boundary_ratio(ratio):
    """boundary_ratio within (0, 1] should pass."""
    config = _make_omni_config(boundary_ratio=ratio)
    config.validate()  # should not raise
+# --- vllm_omni API compatibility guards ---
+# These tests catch regressions when vllm_omni is upgraded.
+def test_omni_engine_args_importable():
+    """vllm_omni.engine.arg_utils must export a usable engine args class."""
+    from vllm_omni.engine.arg_utils import OmniEngineArgs
+    assert hasattr(OmniEngineArgs, "add_cli_args")
+    assert hasattr(OmniEngineArgs, "from_cli_args")
+def test_omni_engine_args_add_cli_args_no_extra_params():
+    """add_cli_args must accept a parser and no other required args."""
+    from vllm_omni.engine.arg_utils import OmniEngineArgs
+    try:
+        from vllm.utils import FlexibleArgumentParser
+    except ImportError:
+        from vllm.utils.argparse_utils import FlexibleArgumentParser
+    parser = FlexibleArgumentParser(add_help=False)
+    OmniEngineArgs.add_cli_args(parser)
+def test_omni_config_imports_cleanly():
+    """OmniConfig and parse_omni_args must be importable without error."""
+    from dynamo.vllm.omni.args import OmniConfig, parse_omni_args
+    assert OmniConfig is not None
+    assert callable(parse_omni_args)
--- a/container/context.yaml
+++ b/container/context.yaml
@@ -61,7 +61,7 @@ vllm:
    vllm_ref: v0.16.0
  flashinf_ref: v0.6.6
  lmcache_ref: 0.4.2
-  vllm_omni_ref: "v0.16.0"
+  vllm_omni_ref: "v0.18.0"
  max_jobs: "10"
  enable_media_ffmpeg: "false"
  enable_gpu_memory_service: "true"

--- a/examples/backends/vllm/launch/agg_omni_i2v.sh
+++ b/examples/backends/vllm/launch/agg_omni_i2v.sh
@@ -13,6 +13,8 @@
 set -e
 trap 'echo Cleaning up...; kill 0' EXIT
+SCRIPT_DIR="$(dirname "$(readlink -f "$0")")"
+source "$SCRIPT_DIR/../../../common/launch_utils.sh"
 MODEL="Wan-AI/Wan2.2-TI2V-5B-Diffusers"
@@ -35,13 +37,26 @@ while [[ $# -gt 0 ]]; do
    esac
 done
-echo "=========================================="
+HTTP_PORT="${DYN_HTTP_PORT:-8000}"
-echo "Starting vLLM-Omni I2V Worker"
+print_launch_banner --no-curl "Launching vLLM-Omni Image-to-Video (1 GPU)" "$MODEL" "$HTTP_PORT"
-echo "Model: $MODEL"
+print_curl_footer <<CURL
-echo "=========================================="
+curl -s http://localhost:${HTTP_PORT}/v1/videos \\
+  -H 'Content-Type: application/json' \\
+  -d '{
+    "model": "${MODEL}",
+    "prompt": "A bear sleeping",
+    "input_reference": "/tmp/input.png",
+    "size": "832x480",
+    "response_format": "url",
+    "nvext": {
+      "num_inference_steps": 40,
+      "num_frames": 33,
+      "guidance_scale": 1.0,
+      "boundary_ratio": 0.875
+    }
+  }' | jq
+CURL
-echo "Starting frontend on port ${DYN_HTTP_PORT:-8000}..."
 python -m dynamo.frontend &
 FRONTEND_PID=$!
@@ -53,4 +68,7 @@ DYN_SYSTEM_PORT=${DYN_SYSTEM_PORT:-8081} \
    --model "$MODEL" \
    --output-modalities video \
    --media-output-fs-url file:///tmp/dynamo_media \
-    "${EXTRA_ARGS[@]}"
+    "${EXTRA_ARGS[@]}" &
+# Exit on first worker failure; kill 0 in the EXIT trap tears down the rest
+wait_any_exit
--- a/examples/backends/vllm/launch/agg_omni_image.sh
+++ b/examples/backends/vllm/launch/agg_omni_image.sh
@@ -26,7 +26,17 @@ while [[ $# -gt 0 ]]; do
 done
 HTTP_PORT="${DYN_HTTP_PORT:-8000}"
-print_launch_banner "Launching vLLM-Omni Image Generation (1 GPU)" "$MODEL" "$HTTP_PORT"
+print_launch_banner --no-curl "Launching vLLM-Omni Image Generation (1 GPU)" "$MODEL" "$HTTP_PORT"
+print_curl_footer <<CURL
+curl -s -X POST http://localhost:${HTTP_PORT}/v1/images/generations \\
+  -H 'Content-Type: application/json' \\
+  -d '{
+    "model": "${MODEL}",
+    "prompt": "A red apple on a white table",
+    "size": "512x512",
+    "num_inference_steps": 20
+  }' | jq
+CURL
 python -m dynamo.frontend &

--- a/examples/backends/vllm/launch/agg_omni_video.sh
+++ b/examples/backends/vllm/launch/agg_omni_video.sh
@@ -26,7 +26,21 @@ while [[ $# -gt 0 ]]; do
 done
 HTTP_PORT="${DYN_HTTP_PORT:-8000}"
-print_launch_banner "Launching vLLM-Omni Video Generation (1 GPU)" "$MODEL" "$HTTP_PORT"
+print_launch_banner --no-curl "Launching vLLM-Omni Video Generation (1 GPU)" "$MODEL" "$HTTP_PORT"
+print_curl_footer <<CURL
+curl -s http://localhost:${HTTP_PORT}/v1/videos \\
+  -H 'Content-Type: application/json' \\
+  -d '{
+    "model": "${MODEL}",
+    "prompt": "Dog running on a beach",
+    "size": "832x480",
+    "response_format": "url",
+    "nvext": {
+      "num_inference_steps": 20,
+      "num_frames": 30
+    }
+  }' | jq
+CURL
 python -m dynamo.frontend &

--- a/pyproject.toml
+++ b/pyproject.toml
@@ -51,10 +51,10 @@ vllm = [
    "uvloop",
    "nixl[cu12]<=0.10.1",
    "vllm[flashinfer,runai,otel]==0.18.0",
-    # vllm-omni 0.16.0 is now on PyPI; install only future rc builds from source in container builds
+    # vllm-omni 0.18.0 is now on PyPI; install only future rc builds from source in container builds
    # (see container/deps/vllm/install_vllm.sh). pip install ai-dynamo[vllm] will
    # not include vllm-omni — install it separately from source if needed.
-    "vllm-omni==0.16.0",
+    "vllm-omni==0.18.0",
    "blake3>=1.0.0,<2.0.0",
 ]