Unverified Commit 19447d66 authored by Ayush Agarwal's avatar Ayush Agarwal Committed by GitHub
Browse files

chore: bump to vllm_omni 0.18.0 (#7685)


Signed-off-by: default avatarayushag <ayushag@nvidia.com>
parent db54ca2f
...@@ -7,7 +7,10 @@ import logging ...@@ -7,7 +7,10 @@ import logging
import os import os
import tempfile import tempfile
import time import time
from typing import Any, Optional from typing import TYPE_CHECKING, Any, Optional
if TYPE_CHECKING:
from dynamo.vllm.omni.args import OmniConfig
import uvloop import uvloop
from prometheus_client import REGISTRY, CollectorRegistry, multiprocess from prometheus_client import REGISTRY, CollectorRegistry, multiprocess
...@@ -183,7 +186,7 @@ async def worker() -> None: ...@@ -183,7 +186,7 @@ async def worker() -> None:
def setup_metrics_collection( def setup_metrics_collection(
config: Config, generate_endpoint: Endpoint, logger: logging.Logger config: "Config | OmniConfig", generate_endpoint: Endpoint, logger: logging.Logger
) -> None: ) -> None:
"""Set up metrics collection for vLLM and LMCache metrics. """Set up metrics collection for vLLM and LMCache metrics.
......
...@@ -7,7 +7,7 @@ import argparse ...@@ -7,7 +7,7 @@ import argparse
import logging import logging
from typing import Optional from typing import Optional
from vllm_omni.engine.arg_utils import AsyncOmniEngineArgs from vllm_omni.engine.arg_utils import OmniEngineArgs
try: try:
from vllm.utils import FlexibleArgumentParser from vllm.utils import FlexibleArgumentParser
...@@ -27,7 +27,7 @@ logger = logging.getLogger(__name__) ...@@ -27,7 +27,7 @@ logger = logging.getLogger(__name__)
class OmniArgGroup(ArgGroup): class OmniArgGroup(ArgGroup):
"""Diffusion pipeline kwargs passed through to AsyncOmni() constructor. """Diffusion pipeline kwargs passed through to AsyncOmni() constructor.
These are NOT part of AsyncOmniEngineArgs (which handles vLLM engine-level These are NOT part of OmniEngineArgs (which handles vLLM engine-level
args like model, tp, max_model_len). Instead they are direct constructor args like model, tp, max_model_len). Instead they are direct constructor
kwargs for AsyncOmni and need Dynamo-side env-var (DYN_OMNI_*) support, kwargs for AsyncOmni and need Dynamo-side env-var (DYN_OMNI_*) support,
so we define them here rather than relying on the upstream arg parser. so we define them here rather than relying on the upstream arg parser.
...@@ -197,7 +197,7 @@ class OmniConfig(DynamoRuntimeConfig): ...@@ -197,7 +197,7 @@ class OmniConfig(DynamoRuntimeConfig):
served_model_name: Optional[str] = None served_model_name: Optional[str] = None
# vLLM-Omni engine args # vLLM-Omni engine args
engine_args: AsyncOmniEngineArgs engine_args: OmniEngineArgs
# OmniArgGroup fields (populated by from_cli_args) # OmniArgGroup fields (populated by from_cli_args)
stage_configs_path: Optional[str] = None stage_configs_path: Optional[str] = None
...@@ -248,7 +248,7 @@ def parse_omni_args() -> OmniConfig: ...@@ -248,7 +248,7 @@ def parse_omni_args() -> OmniConfig:
"vLLM-Omni Engine Options. Please refer to vLLM-Omni documentation for more details." "vLLM-Omni Engine Options. Please refer to vLLM-Omni documentation for more details."
) )
vllm_parser = FlexibleArgumentParser(add_help=False) vllm_parser = FlexibleArgumentParser(add_help=False)
AsyncOmniEngineArgs.add_cli_args(vllm_parser, async_args_only=False) OmniEngineArgs.add_cli_args(vllm_parser)
for action in vllm_parser._actions: for action in vllm_parser._actions:
if not action.option_strings: if not action.option_strings:
...@@ -265,7 +265,7 @@ def parse_omni_args() -> OmniConfig: ...@@ -265,7 +265,7 @@ def parse_omni_args() -> OmniConfig:
vllm_args = vllm_parser.parse_args(unknown) vllm_args = vllm_parser.parse_args(unknown)
config.model = vllm_args.model config.model = vllm_args.model
engine_args = AsyncOmniEngineArgs.from_cli_args(vllm_args) engine_args = OmniEngineArgs.from_cli_args(vllm_args)
if getattr(engine_args, "served_model_name", None) is not None: if getattr(engine_args, "served_model_name", None) is not None:
served = engine_args.served_model_name served = engine_args.served_model_name
......
...@@ -22,7 +22,7 @@ pytestmark = [ ...@@ -22,7 +22,7 @@ pytestmark = [
def _make_omni_config(**overrides) -> OmniConfig: def _make_omni_config(**overrides) -> OmniConfig:
"""Build a minimal OmniConfig with valid defaults, applying overrides.""" """Build a minimal OmniConfig with valid defaults, applying overrides."""
defaults = { defaults: dict = {
# DynamoRuntimeConfig fields # DynamoRuntimeConfig fields
"namespace": "dynamo", "namespace": "dynamo",
"component": "backend", "component": "backend",
...@@ -113,3 +113,37 @@ def test_omni_config_valid_boundary_ratio(ratio): ...@@ -113,3 +113,37 @@ def test_omni_config_valid_boundary_ratio(ratio):
"""boundary_ratio within (0, 1] should pass.""" """boundary_ratio within (0, 1] should pass."""
config = _make_omni_config(boundary_ratio=ratio) config = _make_omni_config(boundary_ratio=ratio)
config.validate() # should not raise config.validate() # should not raise
# --- vllm_omni API compatibility guards ---
# These tests catch regressions when vllm_omni is upgraded.
def test_omni_engine_args_importable():
"""vllm_omni.engine.arg_utils must export a usable engine args class."""
from vllm_omni.engine.arg_utils import OmniEngineArgs
assert hasattr(OmniEngineArgs, "add_cli_args")
assert hasattr(OmniEngineArgs, "from_cli_args")
def test_omni_engine_args_add_cli_args_no_extra_params():
"""add_cli_args must accept a parser and no other required args."""
from vllm_omni.engine.arg_utils import OmniEngineArgs
try:
from vllm.utils import FlexibleArgumentParser
except ImportError:
from vllm.utils.argparse_utils import FlexibleArgumentParser
parser = FlexibleArgumentParser(add_help=False)
OmniEngineArgs.add_cli_args(parser)
def test_omni_config_imports_cleanly():
"""OmniConfig and parse_omni_args must be importable without error."""
from dynamo.vllm.omni.args import OmniConfig, parse_omni_args
assert OmniConfig is not None
assert callable(parse_omni_args)
...@@ -61,7 +61,7 @@ vllm: ...@@ -61,7 +61,7 @@ vllm:
vllm_ref: v0.16.0 vllm_ref: v0.16.0
flashinf_ref: v0.6.6 flashinf_ref: v0.6.6
lmcache_ref: 0.4.2 lmcache_ref: 0.4.2
vllm_omni_ref: "v0.16.0" vllm_omni_ref: "v0.18.0"
max_jobs: "10" max_jobs: "10"
enable_media_ffmpeg: "false" enable_media_ffmpeg: "false"
enable_gpu_memory_service: "true" enable_gpu_memory_service: "true"
......
...@@ -13,6 +13,8 @@ ...@@ -13,6 +13,8 @@
set -e set -e
trap 'echo Cleaning up...; kill 0' EXIT trap 'echo Cleaning up...; kill 0' EXIT
SCRIPT_DIR="$(dirname "$(readlink -f "$0")")"
source "$SCRIPT_DIR/../../../common/launch_utils.sh"
MODEL="Wan-AI/Wan2.2-TI2V-5B-Diffusers" MODEL="Wan-AI/Wan2.2-TI2V-5B-Diffusers"
...@@ -35,13 +37,26 @@ while [[ $# -gt 0 ]]; do ...@@ -35,13 +37,26 @@ while [[ $# -gt 0 ]]; do
esac esac
done done
echo "==========================================" HTTP_PORT="${DYN_HTTP_PORT:-8000}"
echo "Starting vLLM-Omni I2V Worker" print_launch_banner --no-curl "Launching vLLM-Omni Image-to-Video (1 GPU)" "$MODEL" "$HTTP_PORT"
echo "Model: $MODEL" print_curl_footer <<CURL
echo "==========================================" curl -s http://localhost:${HTTP_PORT}/v1/videos \\
-H 'Content-Type: application/json' \\
-d '{
"model": "${MODEL}",
"prompt": "A bear sleeping",
"input_reference": "/tmp/input.png",
"size": "832x480",
"response_format": "url",
"nvext": {
"num_inference_steps": 40,
"num_frames": 33,
"guidance_scale": 1.0,
"boundary_ratio": 0.875
}
}' | jq
CURL
echo "Starting frontend on port ${DYN_HTTP_PORT:-8000}..."
python -m dynamo.frontend & python -m dynamo.frontend &
FRONTEND_PID=$! FRONTEND_PID=$!
...@@ -53,4 +68,7 @@ DYN_SYSTEM_PORT=${DYN_SYSTEM_PORT:-8081} \ ...@@ -53,4 +68,7 @@ DYN_SYSTEM_PORT=${DYN_SYSTEM_PORT:-8081} \
--model "$MODEL" \ --model "$MODEL" \
--output-modalities video \ --output-modalities video \
--media-output-fs-url file:///tmp/dynamo_media \ --media-output-fs-url file:///tmp/dynamo_media \
"${EXTRA_ARGS[@]}" "${EXTRA_ARGS[@]}" &
# Exit on first worker failure; kill 0 in the EXIT trap tears down the rest
wait_any_exit
...@@ -26,7 +26,17 @@ while [[ $# -gt 0 ]]; do ...@@ -26,7 +26,17 @@ while [[ $# -gt 0 ]]; do
done done
HTTP_PORT="${DYN_HTTP_PORT:-8000}" HTTP_PORT="${DYN_HTTP_PORT:-8000}"
print_launch_banner "Launching vLLM-Omni Image Generation (1 GPU)" "$MODEL" "$HTTP_PORT" print_launch_banner --no-curl "Launching vLLM-Omni Image Generation (1 GPU)" "$MODEL" "$HTTP_PORT"
print_curl_footer <<CURL
curl -s -X POST http://localhost:${HTTP_PORT}/v1/images/generations \\
-H 'Content-Type: application/json' \\
-d '{
"model": "${MODEL}",
"prompt": "A red apple on a white table",
"size": "512x512",
"num_inference_steps": 20
}' | jq
CURL
python -m dynamo.frontend & python -m dynamo.frontend &
......
...@@ -26,7 +26,21 @@ while [[ $# -gt 0 ]]; do ...@@ -26,7 +26,21 @@ while [[ $# -gt 0 ]]; do
done done
HTTP_PORT="${DYN_HTTP_PORT:-8000}" HTTP_PORT="${DYN_HTTP_PORT:-8000}"
print_launch_banner "Launching vLLM-Omni Video Generation (1 GPU)" "$MODEL" "$HTTP_PORT" print_launch_banner --no-curl "Launching vLLM-Omni Video Generation (1 GPU)" "$MODEL" "$HTTP_PORT"
print_curl_footer <<CURL
curl -s http://localhost:${HTTP_PORT}/v1/videos \\
-H 'Content-Type: application/json' \\
-d '{
"model": "${MODEL}",
"prompt": "Dog running on a beach",
"size": "832x480",
"response_format": "url",
"nvext": {
"num_inference_steps": 20,
"num_frames": 30
}
}' | jq
CURL
python -m dynamo.frontend & python -m dynamo.frontend &
......
...@@ -51,10 +51,10 @@ vllm = [ ...@@ -51,10 +51,10 @@ vllm = [
"uvloop", "uvloop",
"nixl[cu12]<=0.10.1", "nixl[cu12]<=0.10.1",
"vllm[flashinfer,runai,otel]==0.18.0", "vllm[flashinfer,runai,otel]==0.18.0",
# vllm-omni 0.16.0 is now on PyPI; install only future rc builds from source in container builds # vllm-omni 0.18.0 is now on PyPI; install only future rc builds from source in container builds
# (see container/deps/vllm/install_vllm.sh). pip install ai-dynamo[vllm] will # (see container/deps/vllm/install_vllm.sh). pip install ai-dynamo[vllm] will
# not include vllm-omni — install it separately from source if needed. # not include vllm-omni — install it separately from source if needed.
"vllm-omni==0.16.0", "vllm-omni==0.18.0",
"blake3>=1.0.0,<2.0.0", "blake3>=1.0.0,<2.0.0",
] ]
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment