Unverified Commit 183100b1 authored by Ben Hamm's avatar Ben Hamm Committed by GitHub
Browse files

fix(recipes): revert untested/blocked recipes to pre-1.0.0 image tags (#7411)


Co-authored-by: default avatarClaude Opus 4.6 <noreply@anthropic.com>
parent da810a26
......@@ -21,7 +21,7 @@ spec:
mountPoint: /opt/model
extraPodSpec:
mainContainer:
image: nvcr.io/nvidia/ai-dynamo/sglang-runtime:1.0.0
image: nvcr.io/nvidia/ai-dynamo/sglang-runtime:0.8.0
decode:
componentType: worker
subComponentType: decode
......@@ -38,7 +38,7 @@ spec:
size: 80Gi
extraPodSpec:
mainContainer:
image: nvcr.io/nvidia/ai-dynamo/sglang-runtime:1.0.0
image: nvcr.io/nvidia/ai-dynamo/sglang-runtime:0.8.0
workingDir: /sgl-workspace/dynamo
command:
- python3
......@@ -85,7 +85,7 @@ spec:
size: 80Gi
extraPodSpec:
mainContainer:
image: nvcr.io/nvidia/ai-dynamo/sglang-runtime:1.0.0
image: nvcr.io/nvidia/ai-dynamo/sglang-runtime:0.8.0
workingDir: /sgl-workspace/dynamo
command:
- python3
......
......@@ -126,7 +126,7 @@ spec:
tolerations: []
affinity: {}
mainContainer:
image: nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:1.0.0
image: nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:0.8.0
args:
- |
python3 -m dynamo.frontend --http-port 8000
......@@ -158,7 +158,7 @@ spec:
tolerations: []
affinity: {}
mainContainer:
image: nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:1.0.0
image: nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:0.8.0
workingDir: /workspace/components/backends/trtllm
# NOTE: If your PVCs (Persistent Volume Claims) are really slow,
# you might need to increase 'failureThreshold' below to allow more time for startup
......@@ -216,7 +216,7 @@ spec:
tolerations: []
affinity: {}
mainContainer:
image: nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:1.0.0
image: nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:0.8.0
workingDir: /workspace/components/backends/trtllm
# NOTE: If your PVCs (Persistent Volume Claims) are really slow,
# you might need to increase 'failureThreshold' below to allow more time for startup
......
......@@ -26,7 +26,7 @@ spec:
periodSeconds: 10
timeoutSeconds: 1800
failureThreshold: 60
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:1.0.0
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.8.0
decode:
componentType: worker
subComponentType: decode
......@@ -52,7 +52,7 @@ spec:
periodSeconds: 10
timeoutSeconds: 10
failureThreshold: 600
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:1.0.0
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.8.0
workingDir: /workspace/dynamo
env:
- name: VLLM_USE_DEEP_GEMM
......@@ -124,7 +124,7 @@ spec:
periodSeconds: 10
timeoutSeconds: 10
failureThreshold: 600
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:1.0.0
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.8.0
workingDir: /workspace/dynamo
env:
- name: VLLM_USE_DEEP_GEMM
......
......@@ -16,7 +16,7 @@ spec:
replicas: 1
extraPodSpec:
mainContainer:
image: nvcr.io/nvidia/ai-dynamo/frontend:1.0.0
image: nvcr.io/nvidia/ai-dynamo/frontend:my-tag
eppConfig:
# This config uses the same disagg-profile-handler as disaggregated deployments.
# The handler's graceful degradation feature makes this possible:
......@@ -60,7 +60,7 @@ spec:
sharedMemory:
size: 20Gi
frontendSidecar:
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:1.0.0
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
args:
- -m
- dynamo.frontend
......@@ -83,7 +83,7 @@ spec:
command:
- /bin/sh
- -c
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:1.0.0
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
workingDir: /workspace/examples/backends/vllm
replicas: 1
resources:
......
......@@ -16,7 +16,7 @@ spec:
replicas: 1
extraPodSpec:
mainContainer:
image: nvcr.io/nvidia/ai-dynamo/epp-image:1.0.0
image: nvcr.io/nvidia/ai-dynamo/epp-image:my-tag
eppConfig:
config:
plugins:
......@@ -68,7 +68,7 @@ spec:
sharedMemory:
size: 80Gi
frontendSidecar:
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:1.0.0
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
args:
- -m
- dynamo.frontend
......@@ -101,7 +101,7 @@ spec:
command:
- /bin/sh
- -c
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:1.0.0
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
workingDir: /workspace/examples/backends/vllm
replicas: 2
resources:
......@@ -119,7 +119,7 @@ spec:
sharedMemory:
size: 80Gi
frontendSidecar:
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:1.0.0
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
args:
- -m
- dynamo.frontend
......@@ -152,7 +152,7 @@ spec:
command:
- /bin/sh
- -c
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:1.0.0
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
workingDir: /workspace/examples/backends/vllm
replicas: 1
resources:
......
......@@ -53,7 +53,7 @@ spec:
- qwen3-235b-a22b-agg-frontend
topologyKey: kubernetes.io/hostname
mainContainer:
image: nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:1.0.0
image: nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:0.8.0
args:
- python3 -m dynamo.frontend --router-mode kv --http-port 8000
command:
......@@ -94,7 +94,7 @@ spec:
--max-num-tokens 8192 \
--max-seq-len 8192 \
--extra-engine-args "${ENGINE_ARGS}"
image: nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:1.0.0
image: nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:0.8.0
workingDir: /workspace/components/backends/trtllm
volumeMounts:
- name: agg-config
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment