Unverified Commit 183100b1 authored by Ben Hamm's avatar Ben Hamm Committed by GitHub
Browse files

fix(recipes): revert untested/blocked recipes to pre-1.0.0 image tags (#7411)


Co-authored-by: default avatarClaude Opus 4.6 <noreply@anthropic.com>
parent da810a26
...@@ -21,7 +21,7 @@ spec: ...@@ -21,7 +21,7 @@ spec:
mountPoint: /opt/model mountPoint: /opt/model
extraPodSpec: extraPodSpec:
mainContainer: mainContainer:
image: nvcr.io/nvidia/ai-dynamo/sglang-runtime:1.0.0 image: nvcr.io/nvidia/ai-dynamo/sglang-runtime:0.8.0
decode: decode:
componentType: worker componentType: worker
subComponentType: decode subComponentType: decode
...@@ -38,7 +38,7 @@ spec: ...@@ -38,7 +38,7 @@ spec:
size: 80Gi size: 80Gi
extraPodSpec: extraPodSpec:
mainContainer: mainContainer:
image: nvcr.io/nvidia/ai-dynamo/sglang-runtime:1.0.0 image: nvcr.io/nvidia/ai-dynamo/sglang-runtime:0.8.0
workingDir: /sgl-workspace/dynamo workingDir: /sgl-workspace/dynamo
command: command:
- python3 - python3
...@@ -85,7 +85,7 @@ spec: ...@@ -85,7 +85,7 @@ spec:
size: 80Gi size: 80Gi
extraPodSpec: extraPodSpec:
mainContainer: mainContainer:
image: nvcr.io/nvidia/ai-dynamo/sglang-runtime:1.0.0 image: nvcr.io/nvidia/ai-dynamo/sglang-runtime:0.8.0
workingDir: /sgl-workspace/dynamo workingDir: /sgl-workspace/dynamo
command: command:
- python3 - python3
......
...@@ -126,7 +126,7 @@ spec: ...@@ -126,7 +126,7 @@ spec:
tolerations: [] tolerations: []
affinity: {} affinity: {}
mainContainer: mainContainer:
image: nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:1.0.0 image: nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:0.8.0
args: args:
- | - |
python3 -m dynamo.frontend --http-port 8000 python3 -m dynamo.frontend --http-port 8000
...@@ -158,7 +158,7 @@ spec: ...@@ -158,7 +158,7 @@ spec:
tolerations: [] tolerations: []
affinity: {} affinity: {}
mainContainer: mainContainer:
image: nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:1.0.0 image: nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:0.8.0
workingDir: /workspace/components/backends/trtllm workingDir: /workspace/components/backends/trtllm
# NOTE: If your PVCs (Persistent Volume Claims) are really slow, # NOTE: If your PVCs (Persistent Volume Claims) are really slow,
# you might need to increase 'failureThreshold' below to allow more time for startup # you might need to increase 'failureThreshold' below to allow more time for startup
...@@ -216,7 +216,7 @@ spec: ...@@ -216,7 +216,7 @@ spec:
tolerations: [] tolerations: []
affinity: {} affinity: {}
mainContainer: mainContainer:
image: nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:1.0.0 image: nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:0.8.0
workingDir: /workspace/components/backends/trtllm workingDir: /workspace/components/backends/trtllm
# NOTE: If your PVCs (Persistent Volume Claims) are really slow, # NOTE: If your PVCs (Persistent Volume Claims) are really slow,
# you might need to increase 'failureThreshold' below to allow more time for startup # you might need to increase 'failureThreshold' below to allow more time for startup
......
...@@ -26,7 +26,7 @@ spec: ...@@ -26,7 +26,7 @@ spec:
periodSeconds: 10 periodSeconds: 10
timeoutSeconds: 1800 timeoutSeconds: 1800
failureThreshold: 60 failureThreshold: 60
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:1.0.0 image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.8.0
decode: decode:
componentType: worker componentType: worker
subComponentType: decode subComponentType: decode
...@@ -52,7 +52,7 @@ spec: ...@@ -52,7 +52,7 @@ spec:
periodSeconds: 10 periodSeconds: 10
timeoutSeconds: 10 timeoutSeconds: 10
failureThreshold: 600 failureThreshold: 600
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:1.0.0 image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.8.0
workingDir: /workspace/dynamo workingDir: /workspace/dynamo
env: env:
- name: VLLM_USE_DEEP_GEMM - name: VLLM_USE_DEEP_GEMM
...@@ -124,7 +124,7 @@ spec: ...@@ -124,7 +124,7 @@ spec:
periodSeconds: 10 periodSeconds: 10
timeoutSeconds: 10 timeoutSeconds: 10
failureThreshold: 600 failureThreshold: 600
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:1.0.0 image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.8.0
workingDir: /workspace/dynamo workingDir: /workspace/dynamo
env: env:
- name: VLLM_USE_DEEP_GEMM - name: VLLM_USE_DEEP_GEMM
......
...@@ -16,7 +16,7 @@ spec: ...@@ -16,7 +16,7 @@ spec:
replicas: 1 replicas: 1
extraPodSpec: extraPodSpec:
mainContainer: mainContainer:
image: nvcr.io/nvidia/ai-dynamo/frontend:1.0.0 image: nvcr.io/nvidia/ai-dynamo/frontend:my-tag
eppConfig: eppConfig:
# This config uses the same disagg-profile-handler as disaggregated deployments. # This config uses the same disagg-profile-handler as disaggregated deployments.
# The handler's graceful degradation feature makes this possible: # The handler's graceful degradation feature makes this possible:
...@@ -60,7 +60,7 @@ spec: ...@@ -60,7 +60,7 @@ spec:
sharedMemory: sharedMemory:
size: 20Gi size: 20Gi
frontendSidecar: frontendSidecar:
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:1.0.0 image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
args: args:
- -m - -m
- dynamo.frontend - dynamo.frontend
...@@ -83,7 +83,7 @@ spec: ...@@ -83,7 +83,7 @@ spec:
command: command:
- /bin/sh - /bin/sh
- -c - -c
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:1.0.0 image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
workingDir: /workspace/examples/backends/vllm workingDir: /workspace/examples/backends/vllm
replicas: 1 replicas: 1
resources: resources:
......
...@@ -16,7 +16,7 @@ spec: ...@@ -16,7 +16,7 @@ spec:
replicas: 1 replicas: 1
extraPodSpec: extraPodSpec:
mainContainer: mainContainer:
image: nvcr.io/nvidia/ai-dynamo/epp-image:1.0.0 image: nvcr.io/nvidia/ai-dynamo/epp-image:my-tag
eppConfig: eppConfig:
config: config:
plugins: plugins:
...@@ -68,7 +68,7 @@ spec: ...@@ -68,7 +68,7 @@ spec:
sharedMemory: sharedMemory:
size: 80Gi size: 80Gi
frontendSidecar: frontendSidecar:
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:1.0.0 image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
args: args:
- -m - -m
- dynamo.frontend - dynamo.frontend
...@@ -101,7 +101,7 @@ spec: ...@@ -101,7 +101,7 @@ spec:
command: command:
- /bin/sh - /bin/sh
- -c - -c
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:1.0.0 image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
workingDir: /workspace/examples/backends/vllm workingDir: /workspace/examples/backends/vllm
replicas: 2 replicas: 2
resources: resources:
...@@ -119,7 +119,7 @@ spec: ...@@ -119,7 +119,7 @@ spec:
sharedMemory: sharedMemory:
size: 80Gi size: 80Gi
frontendSidecar: frontendSidecar:
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:1.0.0 image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
args: args:
- -m - -m
- dynamo.frontend - dynamo.frontend
...@@ -152,7 +152,7 @@ spec: ...@@ -152,7 +152,7 @@ spec:
command: command:
- /bin/sh - /bin/sh
- -c - -c
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:1.0.0 image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
workingDir: /workspace/examples/backends/vllm workingDir: /workspace/examples/backends/vllm
replicas: 1 replicas: 1
resources: resources:
......
...@@ -53,7 +53,7 @@ spec: ...@@ -53,7 +53,7 @@ spec:
- qwen3-235b-a22b-agg-frontend - qwen3-235b-a22b-agg-frontend
topologyKey: kubernetes.io/hostname topologyKey: kubernetes.io/hostname
mainContainer: mainContainer:
image: nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:1.0.0 image: nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:0.8.0
args: args:
- python3 -m dynamo.frontend --router-mode kv --http-port 8000 - python3 -m dynamo.frontend --router-mode kv --http-port 8000
command: command:
...@@ -94,7 +94,7 @@ spec: ...@@ -94,7 +94,7 @@ spec:
--max-num-tokens 8192 \ --max-num-tokens 8192 \
--max-seq-len 8192 \ --max-seq-len 8192 \
--extra-engine-args "${ENGINE_ARGS}" --extra-engine-args "${ENGINE_ARGS}"
image: nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:1.0.0 image: nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:0.8.0
workingDir: /workspace/components/backends/trtllm workingDir: /workspace/components/backends/trtllm
volumeMounts: volumeMounts:
- name: agg-config - name: agg-config
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment