fix: update the tags for consistency and remove 0.4.1 refs (#3058)

Signed-off-by: Harrison King Saturley-Hall <hsaturleyhal@nvidia.com> Signed-off-by: Harrison Saturley-Hall <hsaturleyhal@nvidia.com>

fix: update the tags for consistency and remove 0.4.1 refs (#3058)
Signed-off-by: Harrison King Saturley-Hall <hsaturleyhal@nvidia.com> Signed-off-by: Harrison Saturley-Hall <hsaturleyhal@nvidia.com>
9e8f67ed · Harrison Saturley-Hall · GitHub · 158435cd · 9e8f67ed · 9e8f67ed
Unverified Commit 9e8f67ed authored Sep 24, 2025 by Harrison Saturley-Hall Committed by GitHub Sep 24, 2025
20 changed files
--- a/benchmarks/incluster/benchmark_job.yaml
+++ b/benchmarks/incluster/benchmark_job.yaml
@@ -17,7 +17,7 @@ spec:
        fsGroup: 1000
      containers:
      - name: benchmark-runner
-        image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.5.0
+        image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
        securityContext:
          allowPrivilegeEscalation: false
          capabilities:

--- a/benchmarks/nixl/nixl-benchmark-deployment.yaml
+++ b/benchmarks/nixl/nixl-benchmark-deployment.yaml
@@ -18,7 +18,7 @@ spec:
        - name: nvcr-imagepullsecret
      containers:
      - name: nixl-benchmark
-        image: nvcr.io/nvidian/nim-llm-dev/vllm-runtime:nixlbench-e42c07a8
+        image: my-registry/vllm-runtime:nixlbench-e42c07a8
        command: ["sh", "-c"]
        args:
          - "nixlbench -etcd_endpoints http://dynamo-platform-etcd:2379 --target_seg_type VRAM --initiator_seg_type VRAM && sleep infinity"

--- a/components/backends/sglang/README.md
+++ b/components/backends/sglang/README.md
@@ -130,7 +130,7 @@ uv pip install --prerelease=allow sglang[all]==0.4.9.post6
 <summary>Instructions</summary>

 ```bash
-docker pull nvcr.io/nvidia/ai-dynamo/sglang-runtime:0.3.2
+docker pull nvcr.io/nvidia/ai-dynamo/sglang-runtime:my-tag
 ```

 </details>

--- a/components/backends/sglang/deploy/README.md
+++ b/components/backends/sglang/deploy/README.md
@@ -92,7 +92,7 @@ Edit the template to match your environment:

 ```yaml
 # Update image registry and tag
-image: your-registry/sglang-runtime:your-tag
+image: my-registry/sglang-runtime:my-tag

 # Configure your model
 args:

--- a/components/backends/sglang/deploy/disagg_planner.yaml
+++ b/components/backends/sglang/deploy/disagg_planner.yaml
@@ -18,7 +18,7 @@ spec:
      replicas: 1
      extraPodSpec:
        mainContainer:
-          image: nvcr.io/nvidian/nim-llm-dev/sglang-runtime:hzhou-0811-1
+          image: my-registry/sglang-runtime:my-tag
    Planner:
      dynamoNamespace: dynamo
      envFromSecret: hf-token-secret
@@ -49,7 +49,7 @@ spec:
        mountPoint: /data
      extraPodSpec:
        mainContainer:
-          image: nvcr.io/nvidian/nim-llm-dev/sglang-runtime:hzhou-0811-1
+          image: my-registry/sglang-runtime:my-tag
          workingDir: /workspace/components/planner/src/dynamo/planner
          command:
            - /bin/sh
@@ -89,7 +89,7 @@ spec:
        failureThreshold: 10
      extraPodSpec:
        mainContainer:
-          image: nvcr.io/nvidian/nim-llm-dev/sglang-runtime:hzhou-0811-1
+          image: my-registry/sglang-runtime:my-tag
          workingDir: /workspace/components/backends/sglang
          command:
            - /bin/sh
@@ -106,7 +106,7 @@ spec:
          gpu: "1"
      extraPodSpec:
        mainContainer:
-          image: nvcr.io/nvidian/nim-llm-dev/sglang-runtime:hzhou-0811-1
+          image: my-registry/sglang-runtime:my-tag
          workingDir: /workspace/components/backends/sglang
          command:
            - python3
@@ -137,7 +137,7 @@ spec:
          gpu: "1"
      extraPodSpec:
        mainContainer:
-          image: nvcr.io/nvidian/nim-llm-dev/sglang-runtime:hzhou-0811-1
+          image: my-registry/sglang-runtime:my-tag
          workingDir: /workspace/components/backends/sglang
          command:
            - python3

--- a/components/backends/trtllm/deploy/README.md
+++ b/components/backends/trtllm/deploy/README.md
@@ -89,7 +89,7 @@ resources:
 ```yaml
 extraPodSpec:
  mainContainer:
-    image: nvcr.io/nvidian/nim-llm-dev/trtllm-runtime:dep-233.17
+    image: my-registry/trtllm-runtime:my-tag
    workingDir: /workspace/components/backends/trtllm
    args:
      - "python3"
@@ -109,7 +109,7 @@ Before using these templates, ensure you have:

 ### Container Images

-The deployment files currently require access to `nvcr.io/nvidian/nim-llm-dev/trtllm-runtime`. If you don't have access, build and push your own image:
+The deployment files currently require access to `my-registry/trtllm-runtime`. If you don't have access, build and push your own image:

 ```bash
 ./container/build.sh --framework tensorrtllm
@@ -141,7 +141,7 @@ Edit the template to match your environment:

 ```yaml
 # Update image registry and tag
-image: your-registry/trtllm-runtime:your-tag
+image: my-registry/trtllm-runtime:my-tag

 # Configure your model and deployment settings
 args:

--- a/components/backends/trtllm/deploy/agg-with-config.yaml
+++ b/components/backends/trtllm/deploy/agg-with-config.yaml
@@ -34,7 +34,7 @@ spec:
      replicas: 1
      extraPodSpec:
        mainContainer:
-          image: nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:0.4.1
+          image: nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:my-tag
    TRTLLMWorker:
      envFromSecret: hf-token-secret
      dynamoNamespace: trtllm-agg
@@ -50,7 +50,7 @@ spec:
          configMap:
            name: nvidia-config
        mainContainer:
-          image: nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:0.4.1
+          image: nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:my-tag
          workingDir: /workspace/components/backends/trtllm
          # mount the configmap as a volume
          volumeMounts:

--- a/components/backends/trtllm/deploy/agg.yaml
+++ b/components/backends/trtllm/deploy/agg.yaml
@@ -13,7 +13,7 @@ spec:
      replicas: 1
      extraPodSpec:
        mainContainer:
-          image: nvcr.io/nvidian/nim-llm-dev/trtllm-runtime:dep-233.17
+          image: my-registry/trtllm-runtime:my-tag
    TRTLLMWorker:
      envFromSecret: hf-token-secret
      dynamoNamespace: trtllm-agg
@@ -24,7 +24,7 @@ spec:
          gpu: "1"
      extraPodSpec:
        mainContainer:
-          image: nvcr.io/nvidian/nim-llm-dev/trtllm-runtime:dep-233.17
+          image: my-registry/trtllm-runtime:my-tag
          workingDir: /workspace/components/backends/trtllm
          command:
            - /bin/sh

--- a/components/backends/trtllm/deploy/agg_router.yaml
+++ b/components/backends/trtllm/deploy/agg_router.yaml
@@ -13,7 +13,7 @@ spec:
      replicas: 1
      extraPodSpec:
        mainContainer:
-          image: nvcr.io/nvidian/nim-llm-dev/trtllm-runtime:dep-233.17
+          image: my-registry/trtllm-runtime:my-tag
      envs:
        - name: DYN_ROUTER_MODE
          value: kv
@@ -27,7 +27,7 @@ spec:
          gpu: "1"
      extraPodSpec:
        mainContainer:
-          image: nvcr.io/nvidian/nim-llm-dev/trtllm-runtime:dep-233.17
+          image: my-registry/trtllm-runtime:my-tag
          workingDir: /workspace/components/backends/trtllm
          command:
            - /bin/sh

--- a/components/backends/trtllm/deploy/disagg.yaml
+++ b/components/backends/trtllm/deploy/disagg.yaml
@@ -13,7 +13,7 @@ spec:
      replicas: 1
      extraPodSpec:
        mainContainer:
-          image: nvcr.io/nvidian/nim-llm-dev/trtllm-runtime:dep-233.17
+          image: my-registry/trtllm-runtime:my-tag
    TRTLLMPrefillWorker:
      dynamoNamespace: trtllm-disagg
      envFromSecret: hf-token-secret
@@ -24,7 +24,7 @@ spec:
          gpu: "1"
      extraPodSpec:
        mainContainer:
-          image: nvcr.io/nvidian/nim-llm-dev/trtllm-runtime:dep-233.17
+          image: my-registry/trtllm-runtime:my-tag
          workingDir: /workspace/components/backends/trtllm
          command:
            - /bin/sh
@@ -41,7 +41,7 @@ spec:
          gpu: "1"
      extraPodSpec:
        mainContainer:
-          image: nvcr.io/nvidian/nim-llm-dev/trtllm-runtime:dep-233.17
+          image: my-registry/trtllm-runtime:my-tag
          workingDir: /workspace/components/backends/trtllm
          command:
            - /bin/sh

--- a/components/backends/trtllm/deploy/disagg_planner.yaml
+++ b/components/backends/trtllm/deploy/disagg_planner.yaml
@@ -18,7 +18,7 @@ spec:
      replicas: 1
      extraPodSpec:
        mainContainer:
-          image: nvcr.io/nvidian/dynamo-dev/dynamo-trtllm-runtime:hzhou-0909-03
+          image: my-registry/trtllm-runtime:my-tag
          workingDir: /workspace/components/backends/trtllm
          command:
            - python3
@@ -69,7 +69,7 @@ spec:
        mountPoint: /data
      extraPodSpec:
        mainContainer:
-          image: nvcr.io/nvidian/dynamo-dev/dynamo-trtllm-runtime:hzhou-0909-03
+          image: my-registry/trtllm-runtime:my-tag
          workingDir: /workspace/components/planner/src/dynamo/planner
          ports:
            - name: metrics
@@ -114,7 +114,7 @@ spec:
        failureThreshold: 10
      extraPodSpec:
        mainContainer:
-          image: nvcr.io/nvidian/dynamo-dev/dynamo-trtllm-runtime:hzhou-0909-03
+          image: my-registry/trtllm-runtime:my-tag
          workingDir: /workspace/components/backends/trtllm
          command:
            - python3
@@ -152,7 +152,7 @@ spec:
              port: 9090
            periodSeconds: 10
            failureThreshold: 60
-          image: nvcr.io/nvidian/dynamo-dev/dynamo-trtllm-runtime:hzhou-0909-03
+          image: my-registry/trtllm-runtime:my-tag
          workingDir: /workspace/components/backends/trtllm
          command:
            - python3
@@ -186,7 +186,7 @@ spec:
              port: 9090
            periodSeconds: 10
            failureThreshold: 60
-          image: nvcr.io/nvidian/dynamo-dev/dynamo-trtllm-runtime:hzhou-0909-03
+          image: my-registry/trtllm-runtime:my-tag
          workingDir: /workspace/components/backends/trtllm
          command:
            - python3

--- a/components/backends/trtllm/deploy/disagg_router.yaml
+++ b/components/backends/trtllm/deploy/disagg_router.yaml
@@ -13,7 +13,7 @@ spec:
      replicas: 1
      extraPodSpec:
        mainContainer:
-          image: nvcr.io/nvidian/nim-llm-dev/trtllm-runtime:dep-233.17
+          image: my-registry/trtllm-runtime:my-tag
      envs:
        - name: DYN_ROUTER_MODE
          value: kv
@@ -27,7 +27,7 @@ spec:
          gpu: "1"
      extraPodSpec:
        mainContainer:
-          image: nvcr.io/nvidian/nim-llm-dev/trtllm-runtime:dep-233.17
+          image: my-registry/trtllm-runtime:my-tag
          workingDir: /workspace/components/backends/trtllm
          command:
            - /bin/sh
@@ -44,7 +44,7 @@ spec:
          gpu: "1"
      extraPodSpec:
        mainContainer:
-          image: nvcr.io/nvidian/nim-llm-dev/trtllm-runtime:dep-233.17
+          image: my-registry/trtllm-runtime:my-tag
          workingDir: /workspace/components/backends/trtllm
          command:
            - /bin/sh

--- a/components/backends/vllm/deploy/README.md
+++ b/components/backends/vllm/deploy/README.md
@@ -116,7 +116,7 @@ Edit the template to match your environment:

 ```yaml
 # Update image registry and tag
-image: your-registry/vllm-runtime:your-tag
+image: my-registry/vllm-runtime:my-tag

 # Configure your model
 args:

--- a/components/backends/vllm/deploy/agg.yaml
+++ b/components/backends/vllm/deploy/agg.yaml
@@ -13,7 +13,7 @@ spec:
      replicas: 1
      extraPodSpec:
        mainContainer:
-          image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.4.1
+          image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
    VllmDecodeWorker:
      envFromSecret: hf-token-secret
      dynamoNamespace: vllm-agg
@@ -24,7 +24,7 @@ spec:
          gpu: "1"
      extraPodSpec:
        mainContainer:
-          image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.4.1
+          image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
          workingDir: /workspace/components/backends/vllm
          command:
            - /bin/sh

--- a/components/backends/vllm/deploy/agg_router.yaml
+++ b/components/backends/vllm/deploy/agg_router.yaml
@@ -13,7 +13,7 @@ spec:
      replicas: 1
      extraPodSpec:
        mainContainer:
-          image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.4.1
+          image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
      envs:
        - name: DYN_ROUTER_MODE
          value: kv
@@ -27,7 +27,7 @@ spec:
          gpu: "1"
      extraPodSpec:
        mainContainer:
-          image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.4.1
+          image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
          workingDir: /workspace/components/backends/vllm
          command:
            - /bin/sh

--- a/components/backends/vllm/deploy/disagg.yaml
+++ b/components/backends/vllm/deploy/disagg.yaml
@@ -13,7 +13,7 @@ spec:
      replicas: 1
      extraPodSpec:
        mainContainer:
-          image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.4.1
+          image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
    VllmDecodeWorker:
      dynamoNamespace: vllm-disagg
      envFromSecret: hf-token-secret
@@ -24,7 +24,7 @@ spec:
          gpu: "1"
      extraPodSpec:
        mainContainer:
-          image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.4.1
+          image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
          workingDir: /workspace/components/backends/vllm
          command:
            - /bin/sh
@@ -41,7 +41,7 @@ spec:
          gpu: "1"
      extraPodSpec:
        mainContainer:
-          image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.4.1
+          image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
          workingDir: /workspace/components/backends/vllm
          command:
            - /bin/sh

--- a/components/backends/vllm/deploy/disagg_planner.yaml
+++ b/components/backends/vllm/deploy/disagg_planner.yaml
@@ -20,7 +20,7 @@ spec:
      replicas: 1
      extraPodSpec:
        mainContainer:
-          image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.4.1
+          image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
    Planner:
      dynamoNamespace: vllm-disagg-planner
      envFromSecret: hf-token-secret
@@ -51,7 +51,7 @@ spec:
        mountPoint: /data
      extraPodSpec:
        mainContainer:
-          image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.4.1
+          image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
          workingDir: /workspace/components/planner/src/dynamo/planner
          command:
            - /bin/sh
@@ -91,7 +91,7 @@ spec:
        failureThreshold: 10
      extraPodSpec:
        mainContainer:
-          image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.4.1
+          image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
          workingDir: /workspace/components/backends/vllm
          command:
            - /bin/sh
@@ -114,7 +114,7 @@ spec:
              port: 9090
            periodSeconds: 10
            failureThreshold: 60
-          image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.4.1
+          image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
          workingDir: /workspace/components/backends/vllm
          command:
            - python3
@@ -139,7 +139,7 @@ spec:
              port: 9090
            periodSeconds: 10
            failureThreshold: 60
-          image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.4.1
+          image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
          workingDir: /workspace/components/backends/vllm
          command:
            - python3

--- a/components/backends/vllm/deploy/disagg_router.yaml
+++ b/components/backends/vllm/deploy/disagg_router.yaml
@@ -13,7 +13,7 @@ spec:
      replicas: 1
      extraPodSpec:
        mainContainer:
-          image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.4.1
+          image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
      envs:
        - name: DYN_ROUTER_MODE
          value: kv
@@ -27,7 +27,7 @@ spec:
          gpu: "1"
      extraPodSpec:
        mainContainer:
-          image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.4.1
+          image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
          workingDir: /workspace/components/backends/vllm
          command:
            - /bin/sh
@@ -44,7 +44,7 @@ spec:
          gpu: "1"
      extraPodSpec:
        mainContainer:
-          image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.4.1
+          image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
          workingDir: /workspace/components/backends/vllm
          command:
            - /bin/sh

--- a/deploy/inference-gateway/helm/dynamo-gaie/values.yaml
+++ b/deploy/inference-gateway/helm/dynamo-gaie/values.yaml
@@ -73,7 +73,7 @@ eppAware:
    # Container name for the sidecar
    name: frontend-router
    # Sidecar image
-    image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.4.1
+    image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
    # Image pull policy for the sidecar
    imagePullPolicy: IfNotPresent
    # Command and args for running the frontend in router mode.

--- a/docs/_includes/install.rst
+++ b/docs/_includes/install.rst
@@ -10,7 +10,7 @@ Install a pre-built wheel from PyPI.
   source venv/bin/activate

   # Install Dynamo from PyPI (choose one backend extra)
-   uv pip install "ai-dynamo[sglang]==0.4.1"  # or [vllm], [trtllm]
+   uv pip install "ai-dynamo[sglang]==my-tag"  # or [vllm], [trtllm]


 Pip from source
@@ -41,4 +41,4 @@ Pull and run prebuilt images from NVIDIA NGC (`nvcr.io`).
   docker run --rm -it \
     --gpus all \
     --network host \
-     nvcr.io/nvidia/ai-dynamo/sglang-runtime:0.4.1  # or vllm, tensorrtllm
+     nvcr.io/nvidia/ai-dynamo/sglang-runtime:my-tag  # or vllm, tensorrtllm