Unverified Commit 3718da8c authored by Ben Hamm's avatar Ben Hamm Committed by GitHub
Browse files

chore(recipes): update container image tags to 1.0.0 (#7375)


Co-authored-by: default avatarClaude Opus 4.6 <noreply@anthropic.com>
parent cec19d4d
...@@ -21,7 +21,7 @@ spec: ...@@ -21,7 +21,7 @@ spec:
mountPoint: /opt/model mountPoint: /opt/model
extraPodSpec: extraPodSpec:
mainContainer: mainContainer:
image: nvcr.io/nvidia/ai-dynamo/sglang-runtime:0.8.0 image: nvcr.io/nvidia/ai-dynamo/sglang-runtime:1.0.0
decode: decode:
componentType: worker componentType: worker
subComponentType: decode subComponentType: decode
...@@ -38,7 +38,7 @@ spec: ...@@ -38,7 +38,7 @@ spec:
size: 80Gi size: 80Gi
extraPodSpec: extraPodSpec:
mainContainer: mainContainer:
image: nvcr.io/nvidia/ai-dynamo/sglang-runtime:0.8.0 image: nvcr.io/nvidia/ai-dynamo/sglang-runtime:1.0.0
workingDir: /sgl-workspace/dynamo workingDir: /sgl-workspace/dynamo
command: command:
- python3 - python3
...@@ -85,7 +85,7 @@ spec: ...@@ -85,7 +85,7 @@ spec:
size: 80Gi size: 80Gi
extraPodSpec: extraPodSpec:
mainContainer: mainContainer:
image: nvcr.io/nvidia/ai-dynamo/sglang-runtime:0.8.0 image: nvcr.io/nvidia/ai-dynamo/sglang-runtime:1.0.0
workingDir: /sgl-workspace/dynamo workingDir: /sgl-workspace/dynamo
command: command:
- python3 - python3
......
...@@ -21,7 +21,7 @@ spec: ...@@ -21,7 +21,7 @@ spec:
mountPoint: /opt/model mountPoint: /opt/model
extraPodSpec: extraPodSpec:
mainContainer: mainContainer:
image: nvcr.io/nvidia/ai-dynamo/sglang-runtime:0.8.0 image: nvcr.io/nvidia/ai-dynamo/sglang-runtime:1.0.0
decode: decode:
componentType: worker componentType: worker
subComponentType: decode subComponentType: decode
...@@ -36,7 +36,7 @@ spec: ...@@ -36,7 +36,7 @@ spec:
size: 80Gi size: 80Gi
extraPodSpec: extraPodSpec:
mainContainer: mainContainer:
image: nvcr.io/nvidia/ai-dynamo/sglang-runtime:0.8.0 image: nvcr.io/nvidia/ai-dynamo/sglang-runtime:1.0.0
workingDir: /workspace workingDir: /workspace
command: command:
- python3 - python3
...@@ -80,7 +80,7 @@ spec: ...@@ -80,7 +80,7 @@ spec:
size: 80Gi size: 80Gi
extraPodSpec: extraPodSpec:
mainContainer: mainContainer:
image: nvcr.io/nvidia/ai-dynamo/sglang-runtime:0.8.0 image: nvcr.io/nvidia/ai-dynamo/sglang-runtime:1.0.0
workingDir: /workspace workingDir: /workspace
command: command:
- python3 - python3
......
...@@ -126,7 +126,7 @@ spec: ...@@ -126,7 +126,7 @@ spec:
tolerations: [] tolerations: []
affinity: {} affinity: {}
mainContainer: mainContainer:
image: nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:0.8.0 image: nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:1.0.0
args: args:
- | - |
python3 -m dynamo.frontend --http-port 8000 python3 -m dynamo.frontend --http-port 8000
...@@ -158,7 +158,7 @@ spec: ...@@ -158,7 +158,7 @@ spec:
tolerations: [] tolerations: []
affinity: {} affinity: {}
mainContainer: mainContainer:
image: nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:0.8.0 image: nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:1.0.0
workingDir: /workspace/components/backends/trtllm workingDir: /workspace/components/backends/trtllm
# NOTE: If your PVCs (Persistent Volume Claims) are really slow, # NOTE: If your PVCs (Persistent Volume Claims) are really slow,
# you might need to increase 'failureThreshold' below to allow more time for startup # you might need to increase 'failureThreshold' below to allow more time for startup
...@@ -216,7 +216,7 @@ spec: ...@@ -216,7 +216,7 @@ spec:
tolerations: [] tolerations: []
affinity: {} affinity: {}
mainContainer: mainContainer:
image: nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:0.8.0 image: nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:1.0.0
workingDir: /workspace/components/backends/trtllm workingDir: /workspace/components/backends/trtllm
# NOTE: If your PVCs (Persistent Volume Claims) are really slow, # NOTE: If your PVCs (Persistent Volume Claims) are really slow,
# you might need to increase 'failureThreshold' below to allow more time for startup # you might need to increase 'failureThreshold' below to allow more time for startup
......
...@@ -26,7 +26,7 @@ spec: ...@@ -26,7 +26,7 @@ spec:
periodSeconds: 10 periodSeconds: 10
timeoutSeconds: 1800 timeoutSeconds: 1800
failureThreshold: 60 failureThreshold: 60
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.8.0 image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:1.0.0
decode: decode:
componentType: worker componentType: worker
subComponentType: decode subComponentType: decode
...@@ -52,7 +52,7 @@ spec: ...@@ -52,7 +52,7 @@ spec:
periodSeconds: 10 periodSeconds: 10
timeoutSeconds: 10 timeoutSeconds: 10
failureThreshold: 600 failureThreshold: 600
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.8.0 image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:1.0.0
workingDir: /workspace/dynamo workingDir: /workspace/dynamo
env: env:
- name: VLLM_USE_DEEP_GEMM - name: VLLM_USE_DEEP_GEMM
...@@ -124,7 +124,7 @@ spec: ...@@ -124,7 +124,7 @@ spec:
periodSeconds: 10 periodSeconds: 10
timeoutSeconds: 10 timeoutSeconds: 10
failureThreshold: 600 failureThreshold: 600
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.8.0 image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:1.0.0
workingDir: /workspace/dynamo workingDir: /workspace/dynamo
env: env:
- name: VLLM_USE_DEEP_GEMM - name: VLLM_USE_DEEP_GEMM
......
...@@ -45,7 +45,7 @@ spec: ...@@ -45,7 +45,7 @@ spec:
command: command:
- /bin/sh - /bin/sh
- -c - -c
image: nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:0.8.0 image: nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:1.0.0
replicas: 1 replicas: 1
TrtllmWorker: TrtllmWorker:
componentType: worker componentType: worker
...@@ -79,7 +79,7 @@ spec: ...@@ -79,7 +79,7 @@ spec:
command: command:
- /bin/sh - /bin/sh
- -c - -c
image: nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:0.8.0 image: nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:1.0.0
env: env:
- name: TRTLLM_ENABLE_PDL - name: TRTLLM_ENABLE_PDL
value: "1" value: "1"
......
...@@ -90,7 +90,7 @@ spec: ...@@ -90,7 +90,7 @@ spec:
command: command:
- /bin/sh - /bin/sh
- -c - -c
image: nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:0.7.0 image: nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:1.0.0
replicas: 1 replicas: 1
TrtllmPrefillWorker: TrtllmPrefillWorker:
componentType: main componentType: main
...@@ -122,7 +122,7 @@ spec: ...@@ -122,7 +122,7 @@ spec:
command: command:
- /bin/sh - /bin/sh
- -c - -c
image: nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:0.7.0 image: nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:1.0.0
env: env:
- name: TRTLLM_ENABLE_PDL - name: TRTLLM_ENABLE_PDL
value: "1" value: "1"
...@@ -187,7 +187,7 @@ spec: ...@@ -187,7 +187,7 @@ spec:
command: command:
- /bin/sh - /bin/sh
- -c - -c
image: nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:0.7.0 image: nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:1.0.0
env: env:
- name: TRTLLM_ENABLE_PDL - name: TRTLLM_ENABLE_PDL
value: "1" value: "1"
......
...@@ -51,7 +51,7 @@ spec: ...@@ -51,7 +51,7 @@ spec:
command: command:
- /bin/sh - /bin/sh
- -c - -c
image: nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:my-tag image: nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:1.0.0
replicas: 1 replicas: 1
TrtllmWorker: TrtllmWorker:
componentType: worker componentType: worker
...@@ -84,7 +84,7 @@ spec: ...@@ -84,7 +84,7 @@ spec:
command: command:
- /bin/sh - /bin/sh
- -c - -c
image: nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:my-tag image: nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:1.0.0
env: env:
- name: TRTLLM_ENABLE_PDL - name: TRTLLM_ENABLE_PDL
value: "1" value: "1"
......
...@@ -51,7 +51,7 @@ spec: ...@@ -51,7 +51,7 @@ spec:
command: command:
- /bin/sh - /bin/sh
- -c - -c
image: nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:my-tag image: nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:1.0.0
replicas: 1 replicas: 1
TrtllmWorker: TrtllmWorker:
componentType: worker componentType: worker
...@@ -84,7 +84,7 @@ spec: ...@@ -84,7 +84,7 @@ spec:
command: command:
- /bin/sh - /bin/sh
- -c - -c
image: nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:my-tag image: nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:1.0.0
env: env:
- name: TRTLLM_ENABLE_PDL - name: TRTLLM_ENABLE_PDL
value: "1" value: "1"
......
...@@ -55,7 +55,7 @@ spec: ...@@ -55,7 +55,7 @@ spec:
command: command:
- /bin/sh - /bin/sh
- -c - -c
image: nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:my-tag image: nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:1.0.0
replicas: 1 replicas: 1
TrtllmWorker: TrtllmWorker:
componentType: worker componentType: worker
...@@ -95,7 +95,7 @@ spec: ...@@ -95,7 +95,7 @@ spec:
command: command:
- /bin/sh - /bin/sh
- -c - -c
image: nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:my-tag image: nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:1.0.0
env: env:
- name: TRTLLM_ENABLE_PDL - name: TRTLLM_ENABLE_PDL
value: "1" value: "1"
......
...@@ -51,7 +51,7 @@ spec: ...@@ -51,7 +51,7 @@ spec:
command: command:
- /bin/sh - /bin/sh
- -c - -c
image: nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:my-tag image: nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:1.0.0
replicas: 1 replicas: 1
TrtllmWorker: TrtllmWorker:
componentType: worker componentType: worker
...@@ -84,7 +84,7 @@ spec: ...@@ -84,7 +84,7 @@ spec:
command: command:
- /bin/sh - /bin/sh
- -c - -c
image: nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:my-tag image: nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:1.0.0
env: env:
- name: TRTLLM_ENABLE_PDL - name: TRTLLM_ENABLE_PDL
value: "1" value: "1"
......
...@@ -16,7 +16,7 @@ For example: ...@@ -16,7 +16,7 @@ For example:
```bash ```bash
./patch-container.sh nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:my-tag ./patch-container.sh nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:my-tag
# produces image: nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:my-tag-patched # produces image: nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:1.0.0-patched
``` ```
If `KimiK25ForConditionalGeneration` is already registered, the patch is skipped. The script is idempotent -- re-running it on an already-patched image is a no-op. If `KimiK25ForConditionalGeneration` is already registered, the patch is skipped. The script is idempotent -- re-running it on an already-patched image is a no-op.
......
...@@ -17,7 +17,7 @@ spec: ...@@ -17,7 +17,7 @@ spec:
mountPoint: /opt/models mountPoint: /opt/models
extraPodSpec: extraPodSpec:
mainContainer: mainContainer:
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.8.0 image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:1.0.0
workingDir: /workspace/examples/backends/vllm workingDir: /workspace/examples/backends/vllm
envs: envs:
- name: HF_HOME - name: HF_HOME
...@@ -45,7 +45,7 @@ spec: ...@@ -45,7 +45,7 @@ spec:
command: command:
- /bin/sh - /bin/sh
- -c - -c
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.8.0 image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:1.0.0
workingDir: /workspace/examples/backends/vllm workingDir: /workspace/examples/backends/vllm
replicas: 1 replicas: 1
resources: resources:
......
...@@ -16,7 +16,7 @@ spec: ...@@ -16,7 +16,7 @@ spec:
replicas: 1 replicas: 1
extraPodSpec: extraPodSpec:
mainContainer: mainContainer:
image: nvcr.io/nvidia/ai-dynamo/frontend:my-tag image: nvcr.io/nvidia/ai-dynamo/frontend:1.0.0
eppConfig: eppConfig:
# This config uses the same disagg-profile-handler as disaggregated deployments. # This config uses the same disagg-profile-handler as disaggregated deployments.
# The handler's graceful degradation feature makes this possible: # The handler's graceful degradation feature makes this possible:
...@@ -60,7 +60,7 @@ spec: ...@@ -60,7 +60,7 @@ spec:
sharedMemory: sharedMemory:
size: 20Gi size: 20Gi
frontendSidecar: frontendSidecar:
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:1.0.0
args: args:
- -m - -m
- dynamo.frontend - dynamo.frontend
...@@ -83,7 +83,7 @@ spec: ...@@ -83,7 +83,7 @@ spec:
command: command:
- /bin/sh - /bin/sh
- -c - -c
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:1.0.0
workingDir: /workspace/examples/backends/vllm workingDir: /workspace/examples/backends/vllm
replicas: 1 replicas: 1
resources: resources:
......
...@@ -17,7 +17,7 @@ spec: ...@@ -17,7 +17,7 @@ spec:
mountPoint: /opt/models mountPoint: /opt/models
extraPodSpec: extraPodSpec:
mainContainer: mainContainer:
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.8.0 image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:1.0.0
workingDir: /workspace/examples/backends/vllm workingDir: /workspace/examples/backends/vllm
envs: envs:
- name: HF_HOME - name: HF_HOME
...@@ -46,7 +46,7 @@ spec: ...@@ -46,7 +46,7 @@ spec:
command: command:
- /bin/sh - /bin/sh
- -c - -c
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.8.0 image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:1.0.0
workingDir: /workspace/examples/backends/vllm workingDir: /workspace/examples/backends/vllm
replicas: 1 replicas: 1
resources: resources:
...@@ -77,7 +77,7 @@ spec: ...@@ -77,7 +77,7 @@ spec:
command: command:
- /bin/sh - /bin/sh
- -c - -c
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.8.0 image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:1.0.0
workingDir: /workspace/examples/backends/vllm workingDir: /workspace/examples/backends/vllm
replicas: 1 replicas: 1
resources: resources:
......
...@@ -17,7 +17,7 @@ spec: ...@@ -17,7 +17,7 @@ spec:
mountPoint: /opt/models mountPoint: /opt/models
extraPodSpec: extraPodSpec:
mainContainer: mainContainer:
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.8.0 image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:1.0.0
workingDir: /workspace/examples/backends/vllm workingDir: /workspace/examples/backends/vllm
envs: envs:
- name: HF_HOME - name: HF_HOME
...@@ -58,7 +58,7 @@ spec: ...@@ -58,7 +58,7 @@ spec:
command: command:
- /bin/sh - /bin/sh
- -c - -c
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.8.0 image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:1.0.0
workingDir: /workspace/examples/backends/vllm workingDir: /workspace/examples/backends/vllm
replicas: 2 replicas: 2
resources: resources:
...@@ -101,7 +101,7 @@ spec: ...@@ -101,7 +101,7 @@ spec:
command: command:
- /bin/sh - /bin/sh
- -c - -c
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.8.0 image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:1.0.0
workingDir: /workspace/examples/backends/vllm workingDir: /workspace/examples/backends/vllm
replicas: 1 replicas: 1
resources: resources:
......
...@@ -16,7 +16,7 @@ spec: ...@@ -16,7 +16,7 @@ spec:
replicas: 1 replicas: 1
extraPodSpec: extraPodSpec:
mainContainer: mainContainer:
image: nvcr.io/nvidia/ai-dynamo/epp-image:my-tag image: nvcr.io/nvidia/ai-dynamo/epp-image:1.0.0
eppConfig: eppConfig:
config: config:
plugins: plugins:
...@@ -68,7 +68,7 @@ spec: ...@@ -68,7 +68,7 @@ spec:
sharedMemory: sharedMemory:
size: 80Gi size: 80Gi
frontendSidecar: frontendSidecar:
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:1.0.0
args: args:
- -m - -m
- dynamo.frontend - dynamo.frontend
...@@ -101,7 +101,7 @@ spec: ...@@ -101,7 +101,7 @@ spec:
command: command:
- /bin/sh - /bin/sh
- -c - -c
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:1.0.0
workingDir: /workspace/examples/backends/vllm workingDir: /workspace/examples/backends/vllm
replicas: 2 replicas: 2
resources: resources:
...@@ -119,7 +119,7 @@ spec: ...@@ -119,7 +119,7 @@ spec:
sharedMemory: sharedMemory:
size: 80Gi size: 80Gi
frontendSidecar: frontendSidecar:
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:1.0.0
args: args:
- -m - -m
- dynamo.frontend - dynamo.frontend
...@@ -152,7 +152,7 @@ spec: ...@@ -152,7 +152,7 @@ spec:
command: command:
- /bin/sh - /bin/sh
- -c - -c
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:1.0.0
workingDir: /workspace/examples/backends/vllm workingDir: /workspace/examples/backends/vllm
replicas: 1 replicas: 1
resources: resources:
......
...@@ -53,7 +53,7 @@ spec: ...@@ -53,7 +53,7 @@ spec:
- qwen3-235b-a22b-agg-frontend - qwen3-235b-a22b-agg-frontend
topologyKey: kubernetes.io/hostname topologyKey: kubernetes.io/hostname
mainContainer: mainContainer:
image: nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:0.8.0 image: nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:1.0.0
args: args:
- python3 -m dynamo.frontend --router-mode kv --http-port 8000 - python3 -m dynamo.frontend --router-mode kv --http-port 8000
command: command:
...@@ -94,7 +94,7 @@ spec: ...@@ -94,7 +94,7 @@ spec:
--max-num-tokens 8192 \ --max-num-tokens 8192 \
--max-seq-len 8192 \ --max-seq-len 8192 \
--extra-engine-args "${ENGINE_ARGS}" --extra-engine-args "${ENGINE_ARGS}"
image: nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:0.8.0 image: nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:1.0.0
workingDir: /workspace/components/backends/trtllm workingDir: /workspace/components/backends/trtllm
volumeMounts: volumeMounts:
- name: agg-config - name: agg-config
......
...@@ -75,7 +75,7 @@ spec: ...@@ -75,7 +75,7 @@ spec:
- qwen3-235b-a22b-disagg-frontend - qwen3-235b-a22b-disagg-frontend
topologyKey: kubernetes.io/hostname topologyKey: kubernetes.io/hostname
mainContainer: mainContainer:
image: nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:0.8.0 image: nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:1.0.0
args: args:
- python3 -m dynamo.frontend --router-mode kv --http-port 8000 - python3 -m dynamo.frontend --router-mode kv --http-port 8000
command: command:
...@@ -109,7 +109,7 @@ spec: ...@@ -109,7 +109,7 @@ spec:
value: /mnt/model-cache value: /mnt/model-cache
- name: ENGINE_ARGS - name: ENGINE_ARGS
value: /engine_configs/prefill.yaml value: /engine_configs/prefill.yaml
image: nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:0.8.0 image: nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:1.0.0
workingDir: /workspace/components/backends/trtllm workingDir: /workspace/components/backends/trtllm
command: command:
- /bin/sh - /bin/sh
...@@ -164,7 +164,7 @@ spec: ...@@ -164,7 +164,7 @@ spec:
value: /mnt/model-cache value: /mnt/model-cache
- name: ENGINE_ARGS - name: ENGINE_ARGS
value: /engine_configs/decode.yaml value: /engine_configs/decode.yaml
image: nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:0.8.0 image: nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:1.0.0
workingDir: /workspace/components/backends/trtllm workingDir: /workspace/components/backends/trtllm
command: command:
- /bin/sh - /bin/sh
......
...@@ -61,7 +61,7 @@ spec: ...@@ -61,7 +61,7 @@ spec:
command: command:
- /bin/sh - /bin/sh
- -c - -c
image: nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:0.8.0 image: nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:1.0.0
replicas: 1 replicas: 1
TrtllmWorker: TrtllmWorker:
componentType: worker componentType: worker
...@@ -94,7 +94,7 @@ spec: ...@@ -94,7 +94,7 @@ spec:
command: command:
- /bin/sh - /bin/sh
- -c - -c
image: nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:0.8.0 image: nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:1.0.0
env: env:
- name: TRTLLM_ENABLE_PDL - name: TRTLLM_ENABLE_PDL
value: "1" value: "1"
......
...@@ -218,7 +218,7 @@ spec: ...@@ -218,7 +218,7 @@ spec:
command: command:
- /bin/sh - /bin/sh
- -c - -c
image: nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:0.8.0 image: nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:1.0.0
replicas: 1 replicas: 1
TrtllmPrefillWorker: TrtllmPrefillWorker:
componentType: worker componentType: worker
...@@ -253,7 +253,7 @@ spec: ...@@ -253,7 +253,7 @@ spec:
command: command:
- /bin/sh - /bin/sh
- -c - -c
image: nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:0.8.0 image: nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:1.0.0
env: env:
- name: TRTLLM_ENABLE_PDL - name: TRTLLM_ENABLE_PDL
value: "1" value: "1"
...@@ -313,7 +313,7 @@ spec: ...@@ -313,7 +313,7 @@ spec:
command: command:
- /bin/sh - /bin/sh
- -c - -c
image: nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:0.8.0 image: nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:1.0.0
env: env:
- name: TRTLLM_ENABLE_PDL - name: TRTLLM_ENABLE_PDL
value: "1" value: "1"
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment