fix: Fixed vllm runtime image tags. Improved docker image tag selection logic (#6290)

Signed-off-by: Dmitry Tokarev <dtokarev@nvidia.com> Co-authored-by: Dillon Cullinan <dcullinan@nvidia.com>

fix: Fixed vllm runtime image tags. Improved docker image tag selection logic (#6290)
Signed-off-by: Dmitry Tokarev <dtokarev@nvidia.com> Co-authored-by: Dillon Cullinan <dcullinan@nvidia.com>
ea02149e · Dmitry Tokarev · GitHub · 56d91ee9 · ea02149e · ea02149e
Unverified Commit ea02149e authored Feb 17, 2026 by Dmitry Tokarev Committed by GitHub Feb 17, 2026
6 changed files
--- a/container/README.md
+++ b/container/README.md
@@ -235,7 +235,7 @@ python container/render.py --framework=vllm --target=local-dev --output-short-fi
 docker build --build-arg USER_UID=$(id -u) --build-arg USER_GID=$(id -g) -f container/rendered.Dockerfile -t dynamo:latest-vllm-local-dev .
 # Build TensorRT-LLM development image called dynamo:latest-trtllm
-python container/render.py --framework=trtllm --target=runtime --output-short-filename
+python container/render.py --framework=trtllm --target=runtime --output-short-filename --cuda-version=13.1
 docker build -t dynamo:latest-trtllm-runtime -f rendered.Dockerfile .
 ```

--- a/container/context.yaml
+++ b/container/context.yaml
@@ -11,7 +11,10 @@
 dynamo:
  base_image: nvcr.io/nvidia/cuda-dl-base
+  cuda12.9:
    base_image_tag: 25.06-cuda12.9-devel-ubuntu24.04
+  cuda13.0:
+    base_image_tag: 25.11-cuda13.0-devel-ubuntu24.04
  epp_image: us-central1-docker.pkg.dev/k8s-staging-images/gateway-api-inference-extension/epp:v0.5.1
  frontend_image: nvcr.io/nvidia/base/ubuntu:noble-20250619
  python_version: "3.12"
@@ -33,11 +36,12 @@ dynamo:
 vllm:
  base_image: nvcr.io/nvidia/cuda-dl-base
  runtime_image: nvcr.io/nvidia/cuda
-  runtime_image_tag: ${CUDA_VERSION}.0-runtime-ubuntu24.04
  cuda12.9:
    base_image_tag: 25.06-cuda12.9-devel-ubuntu24.04
+    runtime_image_tag: 12.9.1-runtime-ubuntu24.04
  cuda13.0:
    base_image_tag: 25.11-cuda13.0-devel-ubuntu24.04
+    runtime_image_tag: 13.0.2-runtime-ubuntu24.04
  vllm_ref: v0.15.1
  flashinf_ref: v0.6.1
  lmcache_ref: 0.3.13
@@ -63,8 +67,9 @@ sglang:
 trtllm:
  base_image: nvcr.io/nvidia/pytorch
-  base_image_tag: 25.12-py3
  runtime_image: nvcr.io/nvidia/cuda-dl-base
+  cuda13.1:
+    base_image_tag: 25.12-py3
    runtime_image_tag: 25.12-cuda13.1-runtime-ubuntu24.04
  enable_media_ffmpeg: "true"
  enable_gpu_memory_service: "false"

--- a/container/render.py
+++ b/container/render.py
@@ -7,7 +7,7 @@ import re
 from pathlib import Path
 import yaml
-from jinja2 import Environment, FileSystemLoader
+from jinja2 import Environment, FileSystemLoader, StrictUndefined
 def parse_args():
@@ -18,7 +18,8 @@ def parse_args():
        "--framework",
        type=str,
        default="vllm",
-        help="Dockerfile framework to use [dynamo, vllm, sglang, trtllm]",
+        choices=["dynamo", "vllm", "sglang", "trtllm"],
+        help="Dockerfile framework to use",
    )
    parser.add_argument(
        "--target",
@@ -30,13 +31,14 @@ def parse_args():
        "--platform",
        type=str,
        default="amd64",
-        help="Dockerfile platform to use. [amdg64, arm64]",
+        help="Dockerfile platform to use. [amd64, arm64]",
    )
    parser.add_argument(
        "--cuda-version",
        type=str,
        default="12.9",
-        help="CUDA version to use. [12.9, 13.0]",
+        choices=["12.9", "13.0", "13.1"],
+        help="CUDA version to use. [12.9 or 13.0 for vllm and sglang, 13.1 for trtllm]",
    )
    parser.add_argument("--make-efa", action="store_true", help="Enable AWS EFA")
    parser.add_argument(
@@ -55,28 +57,44 @@ def parse_args():
 def validate_args(args):
    valid_inputs = {
-        "vllm": {"runtime", "dev", "local-dev", "framework", "wheel_builder", "base"},
+        "vllm": {
-        "trtllm": {"runtime", "dev", "local-dev", "framework", "wheel_builder", "base"},
+            "target": ["runtime", "dev", "local-dev", "framework", "wheel_builder", "base"],
-        "sglang": {"runtime", "dev", "local-dev", "wheel_builder", "base"},
+            "cuda_version": ["12.9", "13.0"],
-        "dynamo": {"runtime", "dev", "local-dev", "frontend", "wheel_builder", "base"},
+        },
+        "trtllm": {
+            "target": ["runtime", "dev", "local-dev", "framework", "wheel_builder", "base"],
+            "cuda_version": ["13.1"],
+        },
+        "sglang": {
+            "target": ["runtime", "dev", "local-dev", "wheel_builder", "base"],
+            "cuda_version": ["12.9", "13.0"],
+        },
+        "dynamo": {
+            "target": ["runtime", "dev", "local-dev", "frontend", "wheel_builder", "base"],
+            "cuda_version": ["12.9", "13.0"],
+        },
    }
    if args.framework in valid_inputs:
-        if args.target in valid_inputs[args.framework]:
+        if args.target in valid_inputs[args.framework]["target"] and args.cuda_version in valid_inputs[args.framework]["cuda_version"]:
            return
+        else:
            raise ValueError(
-            f"Invalid input combination: [framework={args.framework},target={args.target}]"
+                f"Invalid input combination: [framework={args.framework},target={args.target},cuda_version={args.cuda_version}]"
            )
    raise ValueError(
-        f"Invalid input combination: [framework={args.framework},target={args.target}]"
+        f"Invalid input combination: [framework={args.framework},target={args.target},cuda_version={args.cuda_version}]"
    )
    return
 def render(args, context, script_dir):
    env = Environment(
-        loader=FileSystemLoader(script_dir), trim_blocks=False, lstrip_blocks=True
+        loader=FileSystemLoader(script_dir),
+        trim_blocks=False,
+        lstrip_blocks=True,
+        undefined=StrictUndefined,  # Raise an error if a variable in the template is not provided in the context
    )
    template = env.get_template("Dockerfile.template")
    rendered = template.render(

--- a/container/templates/args.Dockerfile
+++ b/container/templates/args.Dockerfile
@@ -24,24 +24,13 @@ ARG PYTHON_VERSION={{ context.dynamo.python_version }}
 ARG CUDA_VERSION={{ cuda_version }}
 ARG CUDA_MAJOR=${CUDA_VERSION%%.*}
-{% if framework == "vllm" or framework == "sglang" -%}
+# Base and runtime images configuration
 {% set cuda_context_key = "cuda" + cuda_version %}
-# Base image configuration
 ARG BASE_IMAGE={{ context[framework].base_image }}
 ARG BASE_IMAGE_TAG={{ context[framework][cuda_context_key].base_image_tag }}
-{% elif framework != "vllm" and framework != "sglang" -%}
+{% if framework in ["sglang", "trtllm", "vllm"] -%}
-ARG BASE_IMAGE={{ context[framework].base_image }}
-ARG BASE_IMAGE_TAG={{ context[framework].base_image_tag }}
-{%- endif %}
-{% if framework == "sglang" -%}
-{% set cuda_context_key = "cuda" + cuda_version %}
-# Base image configuration
 ARG RUNTIME_IMAGE={{ context[framework].runtime_image }}
 ARG RUNTIME_IMAGE_TAG={{ context[framework][cuda_context_key].runtime_image_tag }}
-{% elif framework != "dynamo" -%}
-ARG RUNTIME_IMAGE={{ context[framework].runtime_image }}
-ARG RUNTIME_IMAGE_TAG={{ context[framework].runtime_image_tag }}
 {%- endif %}
 # Build configuration

--- a/docs/pages/backends/trtllm/README.md
+++ b/docs/pages/backends/trtllm/README.md
@@ -82,11 +82,11 @@ docker compose -f deploy/docker-compose.yml up -d
 apt-get update && apt-get -y install git git-lfs
 # On an x86 machine:
-python container/render.py --framework=trtllm --target=runtime --output-short-filename
+python container/render.py --framework=trtllm --target=runtime --output-short-filename --cuda-version=13.1
 docker build -t dynamo:trtllm-latest -f container/rendered.Dockerfile .
 # On an ARM machine:
-python container/render.py --framework=trtllm --target=runtime --platform=arm64 --output-short-filename
+python container/render.py --framework=trtllm --target=runtime --platform=arm64 --output-short-filename --cuda-version=13.1
 docker build -t dynamo:trtllm-latest -f container/rendered.Dockerfile .
 ```

--- a/examples/backends/trtllm/deploy/README.md
+++ b/examples/backends/trtllm/deploy/README.md
@@ -112,7 +112,7 @@ Before using these templates, ensure you have:
 The deployment files currently require access to `my-registry/tensorrtllm-runtime`. If you don't have access, build and push your own image:
 ```bash
-python container/render.py --framework=trtllm --output-short-filename
+python container/render.py --framework=trtllm --output-short-filename --cuda-version=13.1
 docker build -f container/rendered.Dockerfile .
 # Tag and push to your container registry
 # Update the image references in the YAML files