Unverified Commit 3718da8c authored by Ben Hamm's avatar Ben Hamm Committed by GitHub
Browse files

chore(recipes): update container image tags to 1.0.0 (#7375)


Co-authored-by: default avatarClaude Opus 4.6 <noreply@anthropic.com>
parent cec19d4d
......@@ -21,7 +21,7 @@ spec:
mountPoint: /opt/model
extraPodSpec:
mainContainer:
image: nvcr.io/nvidia/ai-dynamo/sglang-runtime:0.8.0
image: nvcr.io/nvidia/ai-dynamo/sglang-runtime:1.0.0
decode:
componentType: worker
subComponentType: decode
......@@ -38,7 +38,7 @@ spec:
size: 80Gi
extraPodSpec:
mainContainer:
image: nvcr.io/nvidia/ai-dynamo/sglang-runtime:0.8.0
image: nvcr.io/nvidia/ai-dynamo/sglang-runtime:1.0.0
workingDir: /sgl-workspace/dynamo
command:
- python3
......@@ -85,7 +85,7 @@ spec:
size: 80Gi
extraPodSpec:
mainContainer:
image: nvcr.io/nvidia/ai-dynamo/sglang-runtime:0.8.0
image: nvcr.io/nvidia/ai-dynamo/sglang-runtime:1.0.0
workingDir: /sgl-workspace/dynamo
command:
- python3
......
......@@ -21,7 +21,7 @@ spec:
mountPoint: /opt/model
extraPodSpec:
mainContainer:
image: nvcr.io/nvidia/ai-dynamo/sglang-runtime:0.8.0
image: nvcr.io/nvidia/ai-dynamo/sglang-runtime:1.0.0
decode:
componentType: worker
subComponentType: decode
......@@ -36,7 +36,7 @@ spec:
size: 80Gi
extraPodSpec:
mainContainer:
image: nvcr.io/nvidia/ai-dynamo/sglang-runtime:0.8.0
image: nvcr.io/nvidia/ai-dynamo/sglang-runtime:1.0.0
workingDir: /workspace
command:
- python3
......@@ -80,7 +80,7 @@ spec:
size: 80Gi
extraPodSpec:
mainContainer:
image: nvcr.io/nvidia/ai-dynamo/sglang-runtime:0.8.0
image: nvcr.io/nvidia/ai-dynamo/sglang-runtime:1.0.0
workingDir: /workspace
command:
- python3
......
......@@ -126,7 +126,7 @@ spec:
tolerations: []
affinity: {}
mainContainer:
image: nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:0.8.0
image: nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:1.0.0
args:
- |
python3 -m dynamo.frontend --http-port 8000
......@@ -158,7 +158,7 @@ spec:
tolerations: []
affinity: {}
mainContainer:
image: nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:0.8.0
image: nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:1.0.0
workingDir: /workspace/components/backends/trtllm
# NOTE: If your PVCs (Persistent Volume Claims) are really slow,
# you might need to increase 'failureThreshold' below to allow more time for startup
......@@ -216,7 +216,7 @@ spec:
tolerations: []
affinity: {}
mainContainer:
image: nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:0.8.0
image: nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:1.0.0
workingDir: /workspace/components/backends/trtllm
# NOTE: If your PVCs (Persistent Volume Claims) are really slow,
# you might need to increase 'failureThreshold' below to allow more time for startup
......
......@@ -26,7 +26,7 @@ spec:
periodSeconds: 10
timeoutSeconds: 1800
failureThreshold: 60
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.8.0
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:1.0.0
decode:
componentType: worker
subComponentType: decode
......@@ -52,7 +52,7 @@ spec:
periodSeconds: 10
timeoutSeconds: 10
failureThreshold: 600
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.8.0
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:1.0.0
workingDir: /workspace/dynamo
env:
- name: VLLM_USE_DEEP_GEMM
......@@ -124,7 +124,7 @@ spec:
periodSeconds: 10
timeoutSeconds: 10
failureThreshold: 600
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.8.0
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:1.0.0
workingDir: /workspace/dynamo
env:
- name: VLLM_USE_DEEP_GEMM
......
......@@ -45,7 +45,7 @@ spec:
command:
- /bin/sh
- -c
image: nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:0.8.0
image: nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:1.0.0
replicas: 1
TrtllmWorker:
componentType: worker
......@@ -79,7 +79,7 @@ spec:
command:
- /bin/sh
- -c
image: nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:0.8.0
image: nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:1.0.0
env:
- name: TRTLLM_ENABLE_PDL
value: "1"
......
......@@ -90,7 +90,7 @@ spec:
command:
- /bin/sh
- -c
image: nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:0.7.0
image: nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:1.0.0
replicas: 1
TrtllmPrefillWorker:
componentType: main
......@@ -122,7 +122,7 @@ spec:
command:
- /bin/sh
- -c
image: nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:0.7.0
image: nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:1.0.0
env:
- name: TRTLLM_ENABLE_PDL
value: "1"
......@@ -187,7 +187,7 @@ spec:
command:
- /bin/sh
- -c
image: nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:0.7.0
image: nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:1.0.0
env:
- name: TRTLLM_ENABLE_PDL
value: "1"
......
......@@ -51,7 +51,7 @@ spec:
command:
- /bin/sh
- -c
image: nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:my-tag
image: nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:1.0.0
replicas: 1
TrtllmWorker:
componentType: worker
......@@ -84,7 +84,7 @@ spec:
command:
- /bin/sh
- -c
image: nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:my-tag
image: nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:1.0.0
env:
- name: TRTLLM_ENABLE_PDL
value: "1"
......
......@@ -51,7 +51,7 @@ spec:
command:
- /bin/sh
- -c
image: nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:my-tag
image: nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:1.0.0
replicas: 1
TrtllmWorker:
componentType: worker
......@@ -84,7 +84,7 @@ spec:
command:
- /bin/sh
- -c
image: nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:my-tag
image: nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:1.0.0
env:
- name: TRTLLM_ENABLE_PDL
value: "1"
......
......@@ -55,7 +55,7 @@ spec:
command:
- /bin/sh
- -c
image: nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:my-tag
image: nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:1.0.0
replicas: 1
TrtllmWorker:
componentType: worker
......@@ -95,7 +95,7 @@ spec:
command:
- /bin/sh
- -c
image: nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:my-tag
image: nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:1.0.0
env:
- name: TRTLLM_ENABLE_PDL
value: "1"
......
......@@ -51,7 +51,7 @@ spec:
command:
- /bin/sh
- -c
image: nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:my-tag
image: nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:1.0.0
replicas: 1
TrtllmWorker:
componentType: worker
......@@ -84,7 +84,7 @@ spec:
command:
- /bin/sh
- -c
image: nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:my-tag
image: nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:1.0.0
env:
- name: TRTLLM_ENABLE_PDL
value: "1"
......
......@@ -16,7 +16,7 @@ For example:
```bash
./patch-container.sh nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:my-tag
# produces image: nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:my-tag-patched
# produces image: nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:1.0.0-patched
```
If `KimiK25ForConditionalGeneration` is already registered, the patch is skipped. The script is idempotent -- re-running it on an already-patched image is a no-op.
......
......@@ -17,7 +17,7 @@ spec:
mountPoint: /opt/models
extraPodSpec:
mainContainer:
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.8.0
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:1.0.0
workingDir: /workspace/examples/backends/vllm
envs:
- name: HF_HOME
......@@ -45,7 +45,7 @@ spec:
command:
- /bin/sh
- -c
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.8.0
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:1.0.0
workingDir: /workspace/examples/backends/vllm
replicas: 1
resources:
......
......@@ -16,7 +16,7 @@ spec:
replicas: 1
extraPodSpec:
mainContainer:
image: nvcr.io/nvidia/ai-dynamo/frontend:my-tag
image: nvcr.io/nvidia/ai-dynamo/frontend:1.0.0
eppConfig:
# This config uses the same disagg-profile-handler as disaggregated deployments.
# The handler's graceful degradation feature makes this possible:
......@@ -60,7 +60,7 @@ spec:
sharedMemory:
size: 20Gi
frontendSidecar:
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:1.0.0
args:
- -m
- dynamo.frontend
......@@ -83,7 +83,7 @@ spec:
command:
- /bin/sh
- -c
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:1.0.0
workingDir: /workspace/examples/backends/vllm
replicas: 1
resources:
......
......@@ -17,7 +17,7 @@ spec:
mountPoint: /opt/models
extraPodSpec:
mainContainer:
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.8.0
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:1.0.0
workingDir: /workspace/examples/backends/vllm
envs:
- name: HF_HOME
......@@ -46,7 +46,7 @@ spec:
command:
- /bin/sh
- -c
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.8.0
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:1.0.0
workingDir: /workspace/examples/backends/vllm
replicas: 1
resources:
......@@ -77,7 +77,7 @@ spec:
command:
- /bin/sh
- -c
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.8.0
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:1.0.0
workingDir: /workspace/examples/backends/vllm
replicas: 1
resources:
......
......@@ -17,7 +17,7 @@ spec:
mountPoint: /opt/models
extraPodSpec:
mainContainer:
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.8.0
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:1.0.0
workingDir: /workspace/examples/backends/vllm
envs:
- name: HF_HOME
......@@ -58,7 +58,7 @@ spec:
command:
- /bin/sh
- -c
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.8.0
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:1.0.0
workingDir: /workspace/examples/backends/vllm
replicas: 2
resources:
......@@ -101,7 +101,7 @@ spec:
command:
- /bin/sh
- -c
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.8.0
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:1.0.0
workingDir: /workspace/examples/backends/vllm
replicas: 1
resources:
......
......@@ -16,7 +16,7 @@ spec:
replicas: 1
extraPodSpec:
mainContainer:
image: nvcr.io/nvidia/ai-dynamo/epp-image:my-tag
image: nvcr.io/nvidia/ai-dynamo/epp-image:1.0.0
eppConfig:
config:
plugins:
......@@ -68,7 +68,7 @@ spec:
sharedMemory:
size: 80Gi
frontendSidecar:
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:1.0.0
args:
- -m
- dynamo.frontend
......@@ -101,7 +101,7 @@ spec:
command:
- /bin/sh
- -c
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:1.0.0
workingDir: /workspace/examples/backends/vllm
replicas: 2
resources:
......@@ -119,7 +119,7 @@ spec:
sharedMemory:
size: 80Gi
frontendSidecar:
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:1.0.0
args:
- -m
- dynamo.frontend
......@@ -152,7 +152,7 @@ spec:
command:
- /bin/sh
- -c
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:1.0.0
workingDir: /workspace/examples/backends/vllm
replicas: 1
resources:
......
......@@ -53,7 +53,7 @@ spec:
- qwen3-235b-a22b-agg-frontend
topologyKey: kubernetes.io/hostname
mainContainer:
image: nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:0.8.0
image: nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:1.0.0
args:
- python3 -m dynamo.frontend --router-mode kv --http-port 8000
command:
......@@ -94,7 +94,7 @@ spec:
--max-num-tokens 8192 \
--max-seq-len 8192 \
--extra-engine-args "${ENGINE_ARGS}"
image: nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:0.8.0
image: nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:1.0.0
workingDir: /workspace/components/backends/trtllm
volumeMounts:
- name: agg-config
......
......@@ -75,7 +75,7 @@ spec:
- qwen3-235b-a22b-disagg-frontend
topologyKey: kubernetes.io/hostname
mainContainer:
image: nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:0.8.0
image: nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:1.0.0
args:
- python3 -m dynamo.frontend --router-mode kv --http-port 8000
command:
......@@ -109,7 +109,7 @@ spec:
value: /mnt/model-cache
- name: ENGINE_ARGS
value: /engine_configs/prefill.yaml
image: nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:0.8.0
image: nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:1.0.0
workingDir: /workspace/components/backends/trtllm
command:
- /bin/sh
......@@ -164,7 +164,7 @@ spec:
value: /mnt/model-cache
- name: ENGINE_ARGS
value: /engine_configs/decode.yaml
image: nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:0.8.0
image: nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:1.0.0
workingDir: /workspace/components/backends/trtllm
command:
- /bin/sh
......
......@@ -61,7 +61,7 @@ spec:
command:
- /bin/sh
- -c
image: nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:0.8.0
image: nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:1.0.0
replicas: 1
TrtllmWorker:
componentType: worker
......@@ -94,7 +94,7 @@ spec:
command:
- /bin/sh
- -c
image: nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:0.8.0
image: nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:1.0.0
env:
- name: TRTLLM_ENABLE_PDL
value: "1"
......
......@@ -218,7 +218,7 @@ spec:
command:
- /bin/sh
- -c
image: nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:0.8.0
image: nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:1.0.0
replicas: 1
TrtllmPrefillWorker:
componentType: worker
......@@ -253,7 +253,7 @@ spec:
command:
- /bin/sh
- -c
image: nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:0.8.0
image: nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:1.0.0
env:
- name: TRTLLM_ENABLE_PDL
value: "1"
......@@ -313,7 +313,7 @@ spec:
command:
- /bin/sh
- -c
image: nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:0.8.0
image: nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:1.0.0
env:
- name: TRTLLM_ENABLE_PDL
value: "1"
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment