fix: misnamed tensorrtllm-runtime image and incorrect tag (#4289)

Signed-off-by: Harrison King Saturley-Hall <hsaturleyhal@nvidia.com> Signed-off-by: Harrison Saturley-Hall <harrison.saturley.hall@gmail.com> Co-authored-by: hhzhang16 <54051230+hhzhang16@users.noreply.github.com>

fix: misnamed tensorrtllm-runtime image and incorrect tag (#4289)
Signed-off-by: Harrison King Saturley-Hall <hsaturleyhal@nvidia.com> Signed-off-by: Harrison Saturley-Hall <harrison.saturley.hall@gmail.com> Co-authored-by: hhzhang16 <54051230+hhzhang16@users.noreply.github.com>
a4eb4e8a · Harrison Saturley-Hall · GitHub · dce20d06 · a4eb4e8a · a4eb4e8a
Unverified Commit a4eb4e8a authored Nov 14, 2025 by Harrison Saturley-Hall Committed by GitHub Nov 14, 2025
13 changed files
--- a/benchmarks/profiler/deploy/profile_sla_aic_dgdr.yaml
+++ b/benchmarks/profiler/deploy/profile_sla_aic_dgdr.yaml
@@ -12,7 +12,7 @@ spec:

  # ProfilingConfig maps directly to the profile_sla.py config format
  profilingConfig:
-    profilerImage: "nvcr.io/nvidian/dynamo-dev/vllm-runtime:dep-554.0"
+    profilerImage: "nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag"
    config:
      # Sweep/profiling configuration
      sweep:
@@ -31,7 +31,7 @@ spec:

  # Deployment overrides for the auto-created DGD
  deploymentOverrides:
-    workersImage: "nvcr.io/nvidian/dynamo-dev/trtllm-runtime:dep-554.0"
+    workersImage: "nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:my-tag"

  # Automatically create DynamoGraphDeployment after profiling
  autoApply: true
--- a/benchmarks/profiler/deploy/profile_sla_dgdr.yaml
+++ b/benchmarks/profiler/deploy/profile_sla_dgdr.yaml
@@ -12,7 +12,7 @@ spec:

  # ProfilingConfig maps directly to the profile_sla.py config format
  profilingConfig:
-    profilerImage: "nvcr.io/nvidian/dynamo-dev/vllm-runtime:dep-554.0"
+    profilerImage: "nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag"
    config:
      # Sweep/profiling configuration
      sweep:
@@ -28,7 +28,7 @@ spec:

  # Deployment overrides for the auto-created DGD
  deploymentOverrides:
-    workersImage: "nvcr.io/nvidian/dynamo-dev/vllm-runtime:dep-554.0"
+    workersImage: "nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag"

  # Automatically create DynamoGraphDeployment after profiling
  autoApply: true
--- a/deploy/cloud/operator/config/samples/nvidia.com_v1alpha1_dynamographdeploymentrequest.yaml
+++ b/deploy/cloud/operator/config/samples/nvidia.com_v1alpha1_dynamographdeploymentrequest.yaml
@@ -25,7 +25,7 @@ spec:
  backend: trtllm

  # ProfilerImage is the container image to use for profiling jobs (required)
-  profilerImage: "nvcr.io/nvidia/ai-dynamo/trtllm-runtime:0.6.1"
+  profilerImage: "nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:0.6.1"

  # ProfilingConfig maps directly to the profile_sla.py config format
  # See benchmarks/profiler/utils/profiler_argparse.py for complete schema

--- a/docs/benchmarks/sla_driven_profiling.md
+++ b/docs/benchmarks/sla_driven_profiling.md
@@ -392,7 +392,7 @@ spec:
  backend: trtllm

  profilingConfig:
-    profilerImage: "nvcr.io/nvidia/ai-dynamo/trtllm-runtime:0.6.1"
+    profilerImage: "nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:0.6.1"
    config:
      sla:
        isl: 4000
@@ -409,7 +409,7 @@ spec:
        backend_version: "0.20.0"

  deploymentOverrides:
-    workersImage: "nvcr.io/nvidia/ai-dynamo/trtllm-runtime:0.6.1"
+    workersImage: "nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:0.6.1"

  autoApply: true
 ```
@@ -493,7 +493,7 @@ AssertionError: num_heads <N> should be divisible by tp_size <M> and the divisio

 ```yaml
 profilingConfig:
-  profilerImage: "nvcr.io/nvidia/ai-dynamo/trtllm-runtime:0.6.1"
+  profilerImage: "nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:0.6.1"
  config:
    hardware:
      max_num_gpus_per_engine: 4  # For Qwen3-0.6B (16 heads / 4 = max TP of 4)

--- a/examples/backends/trtllm/deploy/README.md
+++ b/examples/backends/trtllm/deploy/README.md
@@ -89,7 +89,7 @@ resources:
 ```yaml
 extraPodSpec:
  mainContainer:
-    image: my-registry/trtllm-runtime:my-tag
+    image: my-registry/tensorrtllm-runtime:my-tag
    workingDir: /workspace/examples/backends/trtllm
    args:
      - "python3"
@@ -109,7 +109,7 @@ Before using these templates, ensure you have:

 ### Container Images

-The deployment files currently require access to `my-registry/trtllm-runtime`. If you don't have access, build and push your own image:
+The deployment files currently require access to `my-registry/tensorrtllm-runtime`. If you don't have access, build and push your own image:

 ```bash
 ./container/build.sh --framework tensorrtllm
@@ -141,7 +141,7 @@ Edit the template to match your environment:

 ```yaml
 # Update image registry and tag
-image: my-registry/trtllm-runtime:my-tag
+image: my-registry/tensorrtllm-runtime:my-tag

 # Configure your model and deployment settings
 args:

--- a/examples/backends/trtllm/deploy/agg.yaml
+++ b/examples/backends/trtllm/deploy/agg.yaml
@@ -13,7 +13,7 @@ spec:
      replicas: 1
      extraPodSpec:
        mainContainer:
-          image: my-registry/trtllm-runtime:my-tag
+          image: my-registry/tensorrtllm-runtime:my-tag
    TRTLLMWorker:
      envFromSecret: hf-token-secret
      dynamoNamespace: trtllm-agg
@@ -24,7 +24,7 @@ spec:
          gpu: "1"
      extraPodSpec:
        mainContainer:
-          image: my-registry/trtllm-runtime:my-tag
+          image: my-registry/tensorrtllm-runtime:my-tag
          workingDir: /workspace/
          command:
          - python3

--- a/examples/backends/trtllm/deploy/agg_router.yaml
+++ b/examples/backends/trtllm/deploy/agg_router.yaml
@@ -13,7 +13,7 @@ spec:
      replicas: 1
      extraPodSpec:
        mainContainer:
-          image: my-registry/trtllm-runtime:my-tag
+          image: my-registry/tensorrtllm-runtime:my-tag
      envs:
        - name: DYN_ROUTER_MODE
          value: kv
@@ -27,7 +27,7 @@ spec:
          gpu: "1"
      extraPodSpec:
        mainContainer:
-          image: my-registry/trtllm-runtime:my-tag
+          image: my-registry/tensorrtllm-runtime:my-tag
          workingDir: /workspace/
          command:
          - python3

--- a/examples/backends/trtllm/deploy/disagg-multinode.yaml
+++ b/examples/backends/trtllm/deploy/disagg-multinode.yaml
@@ -95,7 +95,7 @@ spec:
      replicas: 1
      extraPodSpec:
        mainContainer:
-          image: my-registry/trtllm-runtime:my-tag
+          image: my-registry/tensorrtllm-runtime:my-tag
          workingDir: /workspace/examples/backends/trtllm
          command:
          - python3
@@ -127,7 +127,7 @@ spec:
            - name: nvidia-config
              mountPath: /workspace/
              readOnly: true
-          image: my-registry/trtllm-runtime:my-tag
+          image: my-registry/tensorrtllm-runtime:my-tag
          workingDir: /workspace/
          command:
          - python3
@@ -165,7 +165,7 @@ spec:
            - name: nvidia-config
              mountPath: /workspace/
              readOnly: true
-          image: my-registry/trtllm-runtime:my-tag
+          image: my-registry/tensorrtllm-runtime:my-tag
          workingDir: /workspace/
          command:
          - python3

--- a/examples/backends/trtllm/deploy/disagg.yaml
+++ b/examples/backends/trtllm/deploy/disagg.yaml
@@ -13,7 +13,7 @@ spec:
      replicas: 1
      extraPodSpec:
        mainContainer:
-          image: my-registry/trtllm-runtime:my-tag
+          image: my-registry/tensorrtllm-runtime:my-tag
    TRTLLMPrefillWorker:
      dynamoNamespace: trtllm-disagg
      envFromSecret: hf-token-secret
@@ -25,7 +25,7 @@ spec:
          gpu: "1"
      extraPodSpec:
        mainContainer:
-          image: my-registry/trtllm-runtime:my-tag
+          image: my-registry/tensorrtllm-runtime:my-tag
          workingDir: /workspace/
          command:
          - python3
@@ -51,7 +51,7 @@ spec:
          gpu: "1"
      extraPodSpec:
        mainContainer:
-          image: my-registry/trtllm-runtime:my-tag
+          image: my-registry/tensorrtllm-runtime:my-tag
          workingDir: /workspace/
          command:
          - python3

--- a/examples/backends/trtllm/deploy/disagg_planner.yaml
+++ b/examples/backends/trtllm/deploy/disagg_planner.yaml
@@ -13,7 +13,7 @@ spec:
      replicas: 1
      extraPodSpec:
        mainContainer:
-          image: my-registry/trtllm-runtime:my-tag
+          image: my-registry/tensorrtllm-runtime:my-tag
          workingDir: /workspace/examples/backends/trtllm
          command:
            - python3
@@ -38,7 +38,7 @@ spec:
      replicas: 1
      extraPodSpec:
        mainContainer:
-          image: my-registry/trtllm-runtime:my-tag
+          image: my-registry/tensorrtllm-runtime:my-tag
          workingDir: /workspace/components/src/dynamo/planner
          ports:
            - name: metrics
@@ -89,7 +89,7 @@ spec:
      extraPodSpec:
        terminationGracePeriodSeconds: 600
        mainContainer:
-          image: my-registry/trtllm-runtime:my-tag
+          image: my-registry/tensorrtllm-runtime:my-tag
          workingDir: /workspace/
          command:
            - python3
@@ -116,7 +116,7 @@ spec:
      extraPodSpec:
        terminationGracePeriodSeconds: 600
        mainContainer:
-          image: my-registry/trtllm-runtime:my-tag
+          image: my-registry/tensorrtllm-runtime:my-tag
          workingDir: /workspace/
          command:
            - python3

--- a/examples/backends/trtllm/deploy/disagg_router.yaml
+++ b/examples/backends/trtllm/deploy/disagg_router.yaml
@@ -13,7 +13,7 @@ spec:
      replicas: 1
      extraPodSpec:
        mainContainer:
-          image: my-registry/trtllm-runtime:my-tag
+          image: my-registry/tensorrtllm-runtime:my-tag
      envs:
        - name: DYN_ROUTER_MODE
          value: kv
@@ -27,7 +27,7 @@ spec:
          gpu: "1"
      extraPodSpec:
        mainContainer:
-          image: my-registry/trtllm-runtime:my-tag
+          image: my-registry/tensorrtllm-runtime:my-tag
          workingDir: /workspace/
          command:
          - python3
@@ -53,7 +53,7 @@ spec:
          gpu: "1"
      extraPodSpec:
        mainContainer:
-          image: my-registry/trtllm-runtime:my-tag
+          image: my-registry/tensorrtllm-runtime:my-tag
          workingDir: /workspace/
          command:
          - python3

--- a/recipes/README.md
+++ b/recipes/README.md
@@ -74,7 +74,7 @@ Ensure your Kubernetes cluster has:

 Ensure access to NVIDIA container registry for runtime images:
 - `nvcr.io/nvidia/ai-dynamo/vllm-runtime:x.y.z`
- `nvcr.io/nvidia/ai-dynamo/trtllm-runtime:x.y.z`
+- `nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:x.y.z`
 - `nvcr.io/nvidia/ai-dynamo/sglang-runtime:x.y.z`

 ### 5. HuggingFace Access and Kubernetes Secret Creation

--- a/recipes/gpt-oss-120b/trtllm/agg/deploy.yaml
+++ b/recipes/gpt-oss-120b/trtllm/agg/deploy.yaml
@@ -46,7 +46,7 @@ spec:
          command:
          - /bin/sh
          - -c
-          image: my-registry/trtllm-runtime:my-tag
+          image: my-registry/tensorrtllm-runtime:my-tag
      replicas: 1
    TrtllmWorker:
      componentType: main
@@ -81,7 +81,7 @@ spec:
          command:
          - /bin/sh
          - -c
-          image: my-registry/trtllm-runtime:my-tag
+          image: my-registry/tensorrtllm-runtime:my-tag
          env:
          - name: TRTLLM_ENABLE_PDL
            value: "1"