fix: update the tags for consistency and remove 0.4.1 refs (#3058)

Signed-off-by: Harrison King Saturley-Hall <hsaturleyhal@nvidia.com> Signed-off-by: Harrison Saturley-Hall <hsaturleyhal@nvidia.com>

fix: update the tags for consistency and remove 0.4.1 refs (#3058)
Signed-off-by: Harrison King Saturley-Hall <hsaturleyhal@nvidia.com> Signed-off-by: Harrison Saturley-Hall <hsaturleyhal@nvidia.com>
9e8f67ed · Harrison Saturley-Hall · GitHub · 158435cd · 9e8f67ed · 9e8f67ed
Unverified Commit 9e8f67ed authored Sep 24, 2025 by Harrison Saturley-Hall Committed by GitHub Sep 24, 2025
20 changed files
--- a/docs/benchmarks/benchmarking.md
+++ b/docs/benchmarks/benchmarking.md
@@ -398,7 +398,7 @@ The benchmark job is configured directly in the YAML file.

 - **Model**: `Qwen/Qwen3-0.6B`
 - **Service**: `qwen-vllm-agg=vllm-agg-frontend:8000`
- **Docker Image**: `nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.5.0`
+- **Docker Image**: `nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag`

 ### Customizing the Job


--- a/docs/benchmarks/pre_deployment_profiling.md
+++ b/docs/benchmarks/pre_deployment_profiling.md
@@ -151,7 +151,7 @@ spec:

 1. **Set the container image:**
   ```bash
-   export DOCKER_IMAGE=nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.4.1 # or any existing image tag (TODO: update to 0.5.0 upon release as profiling with 0.4.1 is broken)
+   export DOCKER_IMAGE=nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
   ```

 2. **Set the config path for the profiling job:**

--- a/examples/basics/kubernetes/Distributed_Inference/agg_router.yaml
+++ b/examples/basics/kubernetes/Distributed_Inference/agg_router.yaml
@@ -38,7 +38,7 @@ spec:
          memory: "2Gi"
      extraPodSpec:
        mainContainer:
-          image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.5.0
+          image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
          workingDir: /workspace/components/backends/vllm
          command:
            - /bin/sh
@@ -95,7 +95,7 @@ spec:
              port: 9090
            periodSeconds: 10
            failureThreshold: 60
-          image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.5.0
+          image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
          volumeMounts:
          - name: local-model-cache
            mountPath: /root/.cache

--- a/examples/deployments/ECS/task_definition_frontend.json
+++ b/examples/deployments/ECS/task_definition_frontend.json
@@ -3,7 +3,7 @@
    "containerDefinitions": [
        {
            "name": "dynamo-vllm-frontend",
-            "image": "nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.4.0",
+            "image": "nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag",
            "repositoryCredentials": {
                "credentialsParameter": "arn:aws:secretsmanager:us-east-2:AWS_ID:secret:ngc_nvcr_access"
            },

--- a/examples/deployments/ECS/task_definition_prefillworker.json
+++ b/examples/deployments/ECS/task_definition_prefillworker.json
@@ -3,7 +3,7 @@
    "containerDefinitions": [
        {
            "name": "dynamo-prefill",
-            "image": "nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.4.0",
+            "image": "nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag",
            "repositoryCredentials": {
                "credentialsParameter": "arn:aws:secretsmanager:us-east-2:AWS_ID:secret:ngc_access"
            },

--- a/recipes/deepseek-r1/sglang-wideep/tep16p-dep16d-disagg.yaml
+++ b/recipes/deepseek-r1/sglang-wideep/tep16p-dep16d-disagg.yaml
@@ -20,7 +20,7 @@ spec:
            periodSeconds: 10
            timeoutSeconds: 1800
            failureThreshold: 60
-          image: nvcr.io/nvidian/dynamo-dev/dynamo-sglang-wideep-runtime:hzhou-0917-01
+          image: my-registry/sglang-wideep-runtime:my-tag
    decode:
      dynamoNamespace: sgl-dsr1-16gpu
      componentType: worker
@@ -45,7 +45,7 @@ spec:
            periodSeconds: 10
            timeoutSeconds: 1800
            failureThreshold: 60
-          image: nvcr.io/nvidian/dynamo-dev/dynamo-sglang-wideep-runtime:hzhou-0917-01
+          image: my-registry/sglang-wideep-runtime:my-tag
          workingDir: /workspace/components/backends/sglang
          command:
            - /bin/sh
@@ -89,7 +89,7 @@ spec:
            periodSeconds: 10
            timeoutSeconds: 1800
            failureThreshold: 60
-          image: nvcr.io/nvidian/dynamo-dev/dynamo-sglang-wideep-runtime:hzhou-0917-01
+          image: my-registry/sglang-wideep-runtime:my-tag
          workingDir: /workspace/components/backends/sglang
          command:
            - /bin/sh

--- a/recipes/deepseek-r1/sglang-wideep/tep8p-dep8d-disagg.yaml
+++ b/recipes/deepseek-r1/sglang-wideep/tep8p-dep8d-disagg.yaml
@@ -20,7 +20,7 @@ spec:
            periodSeconds: 10
            timeoutSeconds: 1800
            failureThreshold: 60
-          image: nvcr.io/nvidian/dynamo-dev/dynamo-sglang-wideep-runtime:hzhou-0917-01
+          image: my-registry/sglang-wideep-runtime:my-tag
    decode:
      dynamoNamespace: sgl-dsr1-8gpu
      componentType: worker
@@ -43,7 +43,7 @@ spec:
            periodSeconds: 10
            timeoutSeconds: 1800
            failureThreshold: 60
-          image: nvcr.io/nvidian/dynamo-dev/dynamo-sglang-wideep-runtime:hzhou-0917-01
+          image: my-registry/sglang-wideep-runtime:my-tag
          workingDir: /workspace/components/backends/sglang
          command:
            - /bin/sh
@@ -84,7 +84,7 @@ spec:
            periodSeconds: 10
            timeoutSeconds: 1800
            failureThreshold: 60
-          image: nvcr.io/nvidian/dynamo-dev/dynamo-sglang-wideep-runtime:hzhou-0917-01
+          image: my-registry/sglang-wideep-runtime:my-tag
          workingDir: /workspace/components/backends/sglang
          command:
            - /bin/sh

--- a/recipes/gpt-oss-120b/trtllm/agg/bench.yaml
+++ b/recipes/gpt-oss-120b/trtllm/agg/bench.yaml
@@ -16,7 +16,7 @@ spec:
      restartPolicy: Never
      containers:
      - name: perf
-        image: nvcr.io/nvidian/nim-llm-dev/vllm-runtime:aiperf-0637181
+        image: my-registry/vllm-runtime:my-tag
        workingDir: /workspace/components/backends/vllm
        env:
          - name: TARGET_MODEL

--- a/recipes/gpt-oss-120b/trtllm/agg/deploy.yaml
+++ b/recipes/gpt-oss-120b/trtllm/agg/deploy.yaml
@@ -56,7 +56,7 @@ spec:
          command:
          - /bin/sh
          - -c
-          image: nvcr.io/nvidian/nim-llm-dev/vllm-runtime:gpt-oss-dynamo-nvl72-debug-trtllm-tot
+          image: my-registry/vllm-runtime:my-tag
          workingDir: /workspace/components/backends/trtllm
      replicas: 1
      resources:

--- a/recipes/llama-3-70b/vllm/agg/deploy.yaml
+++ b/recipes/llama-3-70b/vllm/agg/deploy.yaml
@@ -16,7 +16,7 @@ spec:
        mountPoint: /root/.cache/huggingface
      extraPodSpec:
        mainContainer:
-          image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.4.1
+          image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
          workingDir: /workspace/components/backends/vllm
      replicas: 1
    VllmPrefillWorker:
@@ -36,7 +36,7 @@ spec:
          command:
          - /bin/sh
          - -c
-          image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.4.1
+          image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
          workingDir: /workspace/components/backends/vllm
      replicas: 1
      resources:

--- a/recipes/llama-3-70b/vllm/agg/perf.yaml
+++ b/recipes/llama-3-70b/vllm/agg/perf.yaml
@@ -16,7 +16,7 @@ spec:
      restartPolicy: Never
      containers:
      - name: perf
-        image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.4.1
+        image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
        workingDir: /workspace/components/backends/vllm
        command:
        - /bin/sh

--- a/recipes/llama-3-70b/vllm/disagg-multi-node/deploy.yaml
+++ b/recipes/llama-3-70b/vllm/disagg-multi-node/deploy.yaml
@@ -16,7 +16,7 @@ spec:
        mountPoint: /root/.cache/huggingface
      extraPodSpec:
        mainContainer:
-          image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.4.1
+          image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
          workingDir: /workspace/components/backends/vllm
      replicas: 1
    VllmPrefillWorker:
@@ -36,7 +36,7 @@ spec:
          command:
          - /bin/sh
          - -c
-          image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.4.1
+          image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
          workingDir: /workspace/components/backends/vllm
      replicas: 1
      resources:
@@ -61,7 +61,7 @@ spec:
          command:
          - /bin/sh
          - -c
-          image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.4.1
+          image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
          workingDir: /workspace/components/backends/vllm
      replicas: 1
      resources:

--- a/recipes/llama-3-70b/vllm/disagg-multi-node/perf.yaml
+++ b/recipes/llama-3-70b/vllm/disagg-multi-node/perf.yaml
@@ -16,7 +16,7 @@ spec:
      restartPolicy: Never
      containers:
      - name: perf
-        image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.4.1
+        image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
        workingDir: /workspace/components/backends/vllm
        command:
        - /bin/sh

--- a/recipes/llama-3-70b/vllm/disagg-single-node/deploy.yaml
+++ b/recipes/llama-3-70b/vllm/disagg-single-node/deploy.yaml
@@ -16,7 +16,7 @@ spec:
        mountPoint: /root/.cache/huggingface
      extraPodSpec:
        mainContainer:
-          image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.4.1
+          image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
          workingDir: /workspace/components/backends/vllm
      replicas: 1
    VllmPrefillWorker:
@@ -46,7 +46,7 @@ spec:
          command:
          - /bin/sh
          - -c
-          image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.4.1
+          image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
          workingDir: /workspace/components/backends/vllm
      replicas: 2
      resources:
@@ -81,7 +81,7 @@ spec:
          command:
          - /bin/sh
          - -c
-          image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.4.1
+          image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
          workingDir: /workspace/components/backends/vllm
      replicas: 1
      resources:

--- a/recipes/llama-3-70b/vllm/disagg-single-node/perf.yaml
+++ b/recipes/llama-3-70b/vllm/disagg-single-node/perf.yaml
@@ -16,7 +16,7 @@ spec:
      restartPolicy: Never
      containers:
      - name: perf
-        image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.4.1
+        image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
        workingDir: /workspace/components/backends/vllm
        command:
        - /bin/sh

--- a/tests/planner/perf_test_configs/agg_8b.yaml
+++ b/tests/planner/perf_test_configs/agg_8b.yaml
@@ -38,7 +38,7 @@ spec:
          memory: "100Gi"
      extraPodSpec:
        mainContainer:
-          image: nvcr.io/nvidian/nim-llm-dev/vllm-runtime:hzhou-0825-01
+          image: my-registry/vllm-runtime:my-tag
          workingDir: /workspace/components/backends/vllm
          command:
            - /bin/sh
@@ -88,7 +88,7 @@ spec:
              port: 9090
            periodSeconds: 10
            failureThreshold: 60
-          image: nvcr.io/nvidian/nim-llm-dev/vllm-runtime:hzhou-0825-01
+          image: my-registry/vllm-runtime:my-tag
          workingDir: /workspace/components/backends/vllm
          command:
            - /bin/sh

--- a/tests/planner/perf_test_configs/disagg_8b_2p2d.yaml
+++ b/tests/planner/perf_test_configs/disagg_8b_2p2d.yaml
@@ -38,7 +38,7 @@ spec:
          memory: "100Gi"
      extraPodSpec:
        mainContainer:
-          image: nvcr.io/nvidian/nim-llm-dev/vllm-runtime:hzhou-0902-01
+          image: my-registry/vllm-runtime:my-tag
          workingDir: /workspace/components/backends/vllm
          command:
            - /bin/sh
@@ -88,7 +88,7 @@ spec:
              port: 9090
            periodSeconds: 10
            failureThreshold: 60
-          image: nvcr.io/nvidian/nim-llm-dev/vllm-runtime:hzhou-0902-01
+          image: my-registry/vllm-runtime:my-tag
          workingDir: /workspace/components/backends/vllm
          command:
            - /bin/sh
@@ -138,7 +138,7 @@ spec:
              port: 9090
            periodSeconds: 10
            failureThreshold: 60
-          image: nvcr.io/nvidian/nim-llm-dev/vllm-runtime:hzhou-0902-01
+          image: my-registry/vllm-runtime:my-tag
          workingDir: /workspace/components/backends/vllm
          command:
            - /bin/sh

--- a/tests/planner/perf_test_configs/disagg_8b_3p1d.yaml
+++ b/tests/planner/perf_test_configs/disagg_8b_3p1d.yaml
@@ -38,7 +38,7 @@ spec:
          memory: "100Gi"
      extraPodSpec:
        mainContainer:
-          image: nvcr.io/nvidian/nim-llm-dev/vllm-runtime:hzhou-0902-01
+          image: my-registry/vllm-runtime:my-tag
          workingDir: /workspace/components/backends/vllm
          command:
            - /bin/sh
@@ -88,7 +88,7 @@ spec:
              port: 9090
            periodSeconds: 10
            failureThreshold: 60
-          image: nvcr.io/nvidian/nim-llm-dev/vllm-runtime:hzhou-0902-01
+          image: my-registry/vllm-runtime:my-tag
          workingDir: /workspace/components/backends/vllm
          command:
            - /bin/sh
@@ -138,7 +138,7 @@ spec:
              port: 9090
            periodSeconds: 10
            failureThreshold: 60
-          image: nvcr.io/nvidian/nim-llm-dev/vllm-runtime:hzhou-0902-01
+          image: my-registry/vllm-runtime:my-tag
          workingDir: /workspace/components/backends/vllm
          command:
            - /bin/sh

--- a/tests/planner/perf_test_configs/disagg_8b_planner.yaml
+++ b/tests/planner/perf_test_configs/disagg_8b_planner.yaml
@@ -43,7 +43,7 @@ spec:
          memory: "100Gi"
      extraPodSpec:
        mainContainer:
-          image: nvcr.io/nvidian/nim-llm-dev/vllm-runtime:hzhou-0902-01
+          image: my-registry/vllm-runtime:my-tag
          workingDir: /workspace/components/backends/vllm
          command:
            - /bin/sh
@@ -79,7 +79,7 @@ spec:
        failureThreshold: 10
      extraPodSpec:
        mainContainer:
-          image: nvcr.io/nvidian/nim-llm-dev/vllm-runtime:hzhou-0902-01
+          image: my-registry/vllm-runtime:my-tag
          workingDir: /workspace/components/planner/src/dynamo/planner
          ports:
            - name: metrics
@@ -128,7 +128,7 @@ spec:
        failureThreshold: 10
      extraPodSpec:
        mainContainer:
-          image: nvcr.io/nvidian/nim-llm-dev/vllm-runtime:hzhou-0902-01
+          image: my-registry/vllm-runtime:my-tag
          workingDir: /workspace/components/backends/vllm
          command:
            - /bin/sh
@@ -179,7 +179,7 @@ spec:
              port: 9090
            periodSeconds: 10
            failureThreshold: 60
-          image: nvcr.io/nvidian/nim-llm-dev/vllm-runtime:hzhou-0902-01
+          image: my-registry/vllm-runtime:my-tag
          workingDir: /workspace/components/backends/vllm
          command:
            - python3
@@ -235,7 +235,7 @@ spec:
              port: 9090
            periodSeconds: 10
            failureThreshold: 60
-          image: nvcr.io/nvidian/nim-llm-dev/vllm-runtime:hzhou-0902-01
+          image: my-registry/vllm-runtime:my-tag
          workingDir: /workspace/components/backends/vllm
          command:
            - python3

--- a/tests/planner/perf_test_configs/disagg_8b_tp2.yaml
+++ b/tests/planner/perf_test_configs/disagg_8b_tp2.yaml
@@ -38,7 +38,7 @@ spec:
          memory: "100Gi"
      extraPodSpec:
        mainContainer:
-          image: nvcr.io/nvidian/nim-llm-dev/vllm-runtime:hzhou-0902-01
+          image: my-registry/vllm-runtime:my-tag
          workingDir: /workspace/components/backends/vllm
          command:
            - /bin/sh
@@ -88,7 +88,7 @@ spec:
              port: 9090
            periodSeconds: 10
            failureThreshold: 60
-          image: nvcr.io/nvidian/nim-llm-dev/vllm-runtime:hzhou-0902-01
+          image: my-registry/vllm-runtime:my-tag
          workingDir: /workspace/components/backends/vllm
          command:
            - /bin/sh
@@ -138,7 +138,7 @@ spec:
              port: 9090
            periodSeconds: 10
            failureThreshold: 60
-          image: nvcr.io/nvidian/nim-llm-dev/vllm-runtime:hzhou-0902-01
+          image: my-registry/vllm-runtime:my-tag
          workingDir: /workspace/components/backends/vllm
          command:
            - /bin/sh