Unverified Commit ea02149e authored by Dmitry Tokarev's avatar Dmitry Tokarev Committed by GitHub
Browse files

fix: Fixed vllm runtime image tags. Improved docker image tag selection logic (#6290)


Signed-off-by: default avatarDmitry Tokarev <dtokarev@nvidia.com>
Co-authored-by: default avatarDillon Cullinan <dcullinan@nvidia.com>
parent 56d91ee9
...@@ -235,7 +235,7 @@ python container/render.py --framework=vllm --target=local-dev --output-short-fi ...@@ -235,7 +235,7 @@ python container/render.py --framework=vllm --target=local-dev --output-short-fi
docker build --build-arg USER_UID=$(id -u) --build-arg USER_GID=$(id -g) -f container/rendered.Dockerfile -t dynamo:latest-vllm-local-dev . docker build --build-arg USER_UID=$(id -u) --build-arg USER_GID=$(id -g) -f container/rendered.Dockerfile -t dynamo:latest-vllm-local-dev .
# Build TensorRT-LLM development image called dynamo:latest-trtllm # Build TensorRT-LLM development image called dynamo:latest-trtllm
python container/render.py --framework=trtllm --target=runtime --output-short-filename python container/render.py --framework=trtllm --target=runtime --output-short-filename --cuda-version=13.1
docker build -t dynamo:latest-trtllm-runtime -f rendered.Dockerfile . docker build -t dynamo:latest-trtllm-runtime -f rendered.Dockerfile .
``` ```
......
...@@ -11,7 +11,10 @@ ...@@ -11,7 +11,10 @@
dynamo: dynamo:
base_image: nvcr.io/nvidia/cuda-dl-base base_image: nvcr.io/nvidia/cuda-dl-base
cuda12.9:
base_image_tag: 25.06-cuda12.9-devel-ubuntu24.04 base_image_tag: 25.06-cuda12.9-devel-ubuntu24.04
cuda13.0:
base_image_tag: 25.11-cuda13.0-devel-ubuntu24.04
epp_image: us-central1-docker.pkg.dev/k8s-staging-images/gateway-api-inference-extension/epp:v0.5.1 epp_image: us-central1-docker.pkg.dev/k8s-staging-images/gateway-api-inference-extension/epp:v0.5.1
frontend_image: nvcr.io/nvidia/base/ubuntu:noble-20250619 frontend_image: nvcr.io/nvidia/base/ubuntu:noble-20250619
python_version: "3.12" python_version: "3.12"
...@@ -33,11 +36,12 @@ dynamo: ...@@ -33,11 +36,12 @@ dynamo:
vllm: vllm:
base_image: nvcr.io/nvidia/cuda-dl-base base_image: nvcr.io/nvidia/cuda-dl-base
runtime_image: nvcr.io/nvidia/cuda runtime_image: nvcr.io/nvidia/cuda
runtime_image_tag: ${CUDA_VERSION}.0-runtime-ubuntu24.04
cuda12.9: cuda12.9:
base_image_tag: 25.06-cuda12.9-devel-ubuntu24.04 base_image_tag: 25.06-cuda12.9-devel-ubuntu24.04
runtime_image_tag: 12.9.1-runtime-ubuntu24.04
cuda13.0: cuda13.0:
base_image_tag: 25.11-cuda13.0-devel-ubuntu24.04 base_image_tag: 25.11-cuda13.0-devel-ubuntu24.04
runtime_image_tag: 13.0.2-runtime-ubuntu24.04
vllm_ref: v0.15.1 vllm_ref: v0.15.1
flashinf_ref: v0.6.1 flashinf_ref: v0.6.1
lmcache_ref: 0.3.13 lmcache_ref: 0.3.13
...@@ -63,8 +67,9 @@ sglang: ...@@ -63,8 +67,9 @@ sglang:
trtllm: trtllm:
base_image: nvcr.io/nvidia/pytorch base_image: nvcr.io/nvidia/pytorch
base_image_tag: 25.12-py3
runtime_image: nvcr.io/nvidia/cuda-dl-base runtime_image: nvcr.io/nvidia/cuda-dl-base
cuda13.1:
base_image_tag: 25.12-py3
runtime_image_tag: 25.12-cuda13.1-runtime-ubuntu24.04 runtime_image_tag: 25.12-cuda13.1-runtime-ubuntu24.04
enable_media_ffmpeg: "true" enable_media_ffmpeg: "true"
enable_gpu_memory_service: "false" enable_gpu_memory_service: "false"
......
...@@ -7,7 +7,7 @@ import re ...@@ -7,7 +7,7 @@ import re
from pathlib import Path from pathlib import Path
import yaml import yaml
from jinja2 import Environment, FileSystemLoader from jinja2 import Environment, FileSystemLoader, StrictUndefined
def parse_args(): def parse_args():
...@@ -18,7 +18,8 @@ def parse_args(): ...@@ -18,7 +18,8 @@ def parse_args():
"--framework", "--framework",
type=str, type=str,
default="vllm", default="vllm",
help="Dockerfile framework to use [dynamo, vllm, sglang, trtllm]", choices=["dynamo", "vllm", "sglang", "trtllm"],
help="Dockerfile framework to use",
) )
parser.add_argument( parser.add_argument(
"--target", "--target",
...@@ -30,13 +31,14 @@ def parse_args(): ...@@ -30,13 +31,14 @@ def parse_args():
"--platform", "--platform",
type=str, type=str,
default="amd64", default="amd64",
help="Dockerfile platform to use. [amdg64, arm64]", help="Dockerfile platform to use. [amd64, arm64]",
) )
parser.add_argument( parser.add_argument(
"--cuda-version", "--cuda-version",
type=str, type=str,
default="12.9", default="12.9",
help="CUDA version to use. [12.9, 13.0]", choices=["12.9", "13.0", "13.1"],
help="CUDA version to use. [12.9 or 13.0 for vllm and sglang, 13.1 for trtllm]",
) )
parser.add_argument("--make-efa", action="store_true", help="Enable AWS EFA") parser.add_argument("--make-efa", action="store_true", help="Enable AWS EFA")
parser.add_argument( parser.add_argument(
...@@ -55,28 +57,44 @@ def parse_args(): ...@@ -55,28 +57,44 @@ def parse_args():
def validate_args(args): def validate_args(args):
valid_inputs = { valid_inputs = {
"vllm": {"runtime", "dev", "local-dev", "framework", "wheel_builder", "base"}, "vllm": {
"trtllm": {"runtime", "dev", "local-dev", "framework", "wheel_builder", "base"}, "target": ["runtime", "dev", "local-dev", "framework", "wheel_builder", "base"],
"sglang": {"runtime", "dev", "local-dev", "wheel_builder", "base"}, "cuda_version": ["12.9", "13.0"],
"dynamo": {"runtime", "dev", "local-dev", "frontend", "wheel_builder", "base"}, },
"trtllm": {
"target": ["runtime", "dev", "local-dev", "framework", "wheel_builder", "base"],
"cuda_version": ["13.1"],
},
"sglang": {
"target": ["runtime", "dev", "local-dev", "wheel_builder", "base"],
"cuda_version": ["12.9", "13.0"],
},
"dynamo": {
"target": ["runtime", "dev", "local-dev", "frontend", "wheel_builder", "base"],
"cuda_version": ["12.9", "13.0"],
},
} }
if args.framework in valid_inputs: if args.framework in valid_inputs:
if args.target in valid_inputs[args.framework]: if args.target in valid_inputs[args.framework]["target"] and args.cuda_version in valid_inputs[args.framework]["cuda_version"]:
return return
else:
raise ValueError( raise ValueError(
f"Invalid input combination: [framework={args.framework},target={args.target}]" f"Invalid input combination: [framework={args.framework},target={args.target},cuda_version={args.cuda_version}]"
) )
raise ValueError( raise ValueError(
f"Invalid input combination: [framework={args.framework},target={args.target}]" f"Invalid input combination: [framework={args.framework},target={args.target},cuda_version={args.cuda_version}]"
) )
return return
def render(args, context, script_dir): def render(args, context, script_dir):
env = Environment( env = Environment(
loader=FileSystemLoader(script_dir), trim_blocks=False, lstrip_blocks=True loader=FileSystemLoader(script_dir),
trim_blocks=False,
lstrip_blocks=True,
undefined=StrictUndefined, # Raise an error if a variable in the template is not provided in the context
) )
template = env.get_template("Dockerfile.template") template = env.get_template("Dockerfile.template")
rendered = template.render( rendered = template.render(
......
...@@ -24,24 +24,13 @@ ARG PYTHON_VERSION={{ context.dynamo.python_version }} ...@@ -24,24 +24,13 @@ ARG PYTHON_VERSION={{ context.dynamo.python_version }}
ARG CUDA_VERSION={{ cuda_version }} ARG CUDA_VERSION={{ cuda_version }}
ARG CUDA_MAJOR=${CUDA_VERSION%%.*} ARG CUDA_MAJOR=${CUDA_VERSION%%.*}
{% if framework == "vllm" or framework == "sglang" -%} # Base and runtime images configuration
{% set cuda_context_key = "cuda" + cuda_version %} {% set cuda_context_key = "cuda" + cuda_version %}
# Base image configuration
ARG BASE_IMAGE={{ context[framework].base_image }} ARG BASE_IMAGE={{ context[framework].base_image }}
ARG BASE_IMAGE_TAG={{ context[framework][cuda_context_key].base_image_tag }} ARG BASE_IMAGE_TAG={{ context[framework][cuda_context_key].base_image_tag }}
{% elif framework != "vllm" and framework != "sglang" -%} {% if framework in ["sglang", "trtllm", "vllm"] -%}
ARG BASE_IMAGE={{ context[framework].base_image }}
ARG BASE_IMAGE_TAG={{ context[framework].base_image_tag }}
{%- endif %}
{% if framework == "sglang" -%}
{% set cuda_context_key = "cuda" + cuda_version %}
# Base image configuration
ARG RUNTIME_IMAGE={{ context[framework].runtime_image }} ARG RUNTIME_IMAGE={{ context[framework].runtime_image }}
ARG RUNTIME_IMAGE_TAG={{ context[framework][cuda_context_key].runtime_image_tag }} ARG RUNTIME_IMAGE_TAG={{ context[framework][cuda_context_key].runtime_image_tag }}
{% elif framework != "dynamo" -%}
ARG RUNTIME_IMAGE={{ context[framework].runtime_image }}
ARG RUNTIME_IMAGE_TAG={{ context[framework].runtime_image_tag }}
{%- endif %} {%- endif %}
# Build configuration # Build configuration
......
...@@ -82,11 +82,11 @@ docker compose -f deploy/docker-compose.yml up -d ...@@ -82,11 +82,11 @@ docker compose -f deploy/docker-compose.yml up -d
apt-get update && apt-get -y install git git-lfs apt-get update && apt-get -y install git git-lfs
# On an x86 machine: # On an x86 machine:
python container/render.py --framework=trtllm --target=runtime --output-short-filename python container/render.py --framework=trtllm --target=runtime --output-short-filename --cuda-version=13.1
docker build -t dynamo:trtllm-latest -f container/rendered.Dockerfile . docker build -t dynamo:trtllm-latest -f container/rendered.Dockerfile .
# On an ARM machine: # On an ARM machine:
python container/render.py --framework=trtllm --target=runtime --platform=arm64 --output-short-filename python container/render.py --framework=trtllm --target=runtime --platform=arm64 --output-short-filename --cuda-version=13.1
docker build -t dynamo:trtllm-latest -f container/rendered.Dockerfile . docker build -t dynamo:trtllm-latest -f container/rendered.Dockerfile .
``` ```
......
...@@ -112,7 +112,7 @@ Before using these templates, ensure you have: ...@@ -112,7 +112,7 @@ Before using these templates, ensure you have:
The deployment files currently require access to `my-registry/tensorrtllm-runtime`. If you don't have access, build and push your own image: The deployment files currently require access to `my-registry/tensorrtllm-runtime`. If you don't have access, build and push your own image:
```bash ```bash
python container/render.py --framework=trtllm --output-short-filename python container/render.py --framework=trtllm --output-short-filename --cuda-version=13.1
docker build -f container/rendered.Dockerfile . docker build -f container/rendered.Dockerfile .
# Tag and push to your container registry # Tag and push to your container registry
# Update the image references in the YAML files # Update the image references in the YAML files
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment