"deploy/vscode:/vscode.git/clone" did not exist on "2a61e29e27178f87e5e7ff42fa9f07ef1e5b00e1"
Unverified Commit d59b56be authored by Ayush Agarwal's avatar Ayush Agarwal Committed by GitHub
Browse files

chore(deps): bump vLLM to 0.19.1 (#8370)


Signed-off-by: default avatarayushag <ayushag@nvidia.com>
parent 47cfbd46
......@@ -45,16 +45,19 @@ def load_qwen_grid_params(model_name: str) -> QwenGridParams | None:
merge_size: int = processor.merge_size
factor = patch_size * merge_size
# Qwen2/2.5-VL use min_pixels/max_pixels directly.
# Qwen3-VL sets them to None and uses size.shortest_edge/longest_edge.
# Qwen2/2.5-VL expose min_pixels/max_pixels attributes (transformers v4);
# transformers v5 and Qwen3-VL drop those attributes and rely on
# size.shortest_edge / size.longest_edge instead.
proc_min_pixels = getattr(processor, "min_pixels", None)
proc_max_pixels = getattr(processor, "max_pixels", None)
min_pixels: int = (
processor.min_pixels
if processor.min_pixels is not None
proc_min_pixels
if proc_min_pixels is not None
else processor.size.get("shortest_edge", factor)
)
max_pixels: int = (
processor.max_pixels
if processor.max_pixels is not None
proc_max_pixels
if proc_max_pixels is not None
else processor.size.get("longest_edge", factor * factor * 1280)
)
vision_hidden_dim: int = getattr(
......
......@@ -6,8 +6,11 @@
import argparse
import dataclasses
import logging
import os
from typing import Optional
import huggingface_hub
from vllm.transformers_utils.repo_utils import get_model_path
from vllm_omni.engine.arg_utils import OmniEngineArgs
try:
......@@ -423,6 +426,31 @@ def parse_omni_args() -> OmniConfig:
vllm_args = vllm_parser.parse_args(unknown)
config.model = vllm_args.model
# Resolve repo id to local snapshot path under HF_HUB_OFFLINE so
# vllm-omni diffusion workers don't hit transformers v5's offline
# LocalEntryNotFoundError (vLLM's EngineArgs does the same rewrite).
if (
huggingface_hub.constants.HF_HUB_OFFLINE
and config.model
and not os.path.exists(config.model)
):
model_id = config.model
config.model = get_model_path(
config.model, getattr(vllm_args, "revision", None)
)
if model_id != config.model:
# Preserve the original repo id as the user-facing model name
# so /v1/models still advertises "Wan-AI/..." not the snapshot path.
if getattr(config, "served_model_name", None) is None:
config.served_model_name = model_id
logger.info(
"HF_HUB_OFFLINE is True; replaced omni model_id [%s] "
"with model_path [%s] so vllm-omni diffusion workers "
"see a local snapshot.",
model_id,
config.model,
)
engine_args = OmniEngineArgs.from_cli_args(vllm_args)
if getattr(engine_args, "served_model_name", None) is not None:
......
......@@ -44,13 +44,13 @@ vllm:
runtime_image: nvcr.io/nvidia/cuda
base_image_tag: 25.06-cuda12.9-devel-ubuntu24.04
runtime_image_tag: 12.9.1-runtime-ubuntu24.04
vllm_ref: v0.19.0
vllm_ref: v0.19.1
cuda13.0:
base_image: nvcr.io/nvidia/cuda-dl-base
runtime_image: nvcr.io/nvidia/cuda
base_image_tag: 25.11-cuda13.0-devel-ubuntu24.04
runtime_image_tag: 13.0.2-runtime-ubuntu24.04
vllm_ref: v0.19.0
vllm_ref: v0.19.1
xpu:
base_image: intel/deep-learning-essentials
runtime_image: intel/deep-learning-essentials
......
......@@ -12,7 +12,7 @@
set -euo pipefail
VLLM_VER="0.19.0"
VLLM_VER="0.19.1"
VLLM_REF="v${VLLM_VER}"
DEVICE="cuda"
......
......@@ -375,8 +375,6 @@ sequenceDiagram
GLM-Image is a 2-stage text-to-image model with an AR stage (generates prior token IDs) and a DiT stage (diffusion denoising + VAE decode). The built-in vLLM-Omni stage config already assigns each stage to a separate GPU.
> **Experimental:** GLM-Image support is experimental; generation may fail or produce incorrect/garbled outputs for some prompts and sizes.
>
> **Known issue:** GLM-Image requires `transformers>=5.0` to recognize the `glm_image` architecture. Older versions fail at model config creation with `The checkpoint you are trying to load has model type 'glm_image' but Transformers does not recognize this architecture`.
```bash
bash examples/backends/vllm/launch/disagg_omni_glm_image.sh
......
......@@ -50,7 +50,7 @@ trtllm =[
vllm = [
"uvloop",
"nixl[cu12]<=0.10.1",
"vllm[flashinfer,runai,otel]==0.19.0",
"vllm[flashinfer,runai,otel]==0.19.1",
# vllm-omni is installed separately in container builds (see
# container/deps/vllm/install_vllm.sh). Do not add it to ai-dynamo[vllm]:
# pip/uv dependency resolution for omni can override the vLLM torch stack.
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment