Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
dynamo
Commits
3718da8c
Unverified
Commit
3718da8c
authored
Mar 14, 2026
by
Ben Hamm
Committed by
GitHub
Mar 14, 2026
Browse files
chore(recipes): update container image tags to 1.0.0 (#7375)
Co-authored-by:
Claude Opus 4.6
<
noreply@anthropic.com
>
parent
cec19d4d
Changes
22
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
52 additions
and
52 deletions
+52
-52
recipes/deepseek-r1/sglang/disagg-16gpu/deploy.yaml
recipes/deepseek-r1/sglang/disagg-16gpu/deploy.yaml
+3
-3
recipes/deepseek-r1/sglang/disagg-8gpu/deploy.yaml
recipes/deepseek-r1/sglang/disagg-8gpu/deploy.yaml
+3
-3
recipes/deepseek-r1/trtllm/disagg/wide_ep/gb200/deploy.yaml
recipes/deepseek-r1/trtllm/disagg/wide_ep/gb200/deploy.yaml
+3
-3
recipes/deepseek-r1/vllm/disagg/deploy_hopper_16gpu.yaml
recipes/deepseek-r1/vllm/disagg/deploy_hopper_16gpu.yaml
+3
-3
recipes/gpt-oss-120b/trtllm/agg/deploy.yaml
recipes/gpt-oss-120b/trtllm/agg/deploy.yaml
+2
-2
recipes/gpt-oss-120b/trtllm/disagg/deploy.yaml
recipes/gpt-oss-120b/trtllm/disagg/deploy.yaml
+3
-3
recipes/kimi-k2.5/trtllm/agg/baseten/deploy.yaml
recipes/kimi-k2.5/trtllm/agg/baseten/deploy.yaml
+2
-2
recipes/kimi-k2.5/trtllm/agg/deploy.yaml
recipes/kimi-k2.5/trtllm/agg/deploy.yaml
+2
-2
recipes/kimi-k2.5/trtllm/agg/nvidia/deploy-kvbm.yaml
recipes/kimi-k2.5/trtllm/agg/nvidia/deploy-kvbm.yaml
+2
-2
recipes/kimi-k2.5/trtllm/agg/nvidia/deploy.yaml
recipes/kimi-k2.5/trtllm/agg/nvidia/deploy.yaml
+2
-2
recipes/kimi-k2.5/trtllm/agg/nvidia/patch/README.md
recipes/kimi-k2.5/trtllm/agg/nvidia/patch/README.md
+1
-1
recipes/llama-3-70b/vllm/agg/deploy.yaml
recipes/llama-3-70b/vllm/agg/deploy.yaml
+2
-2
recipes/llama-3-70b/vllm/agg/gaie/deploy.yaml
recipes/llama-3-70b/vllm/agg/gaie/deploy.yaml
+3
-3
recipes/llama-3-70b/vllm/disagg-multi-node/deploy.yaml
recipes/llama-3-70b/vllm/disagg-multi-node/deploy.yaml
+3
-3
recipes/llama-3-70b/vllm/disagg-single-node/deploy.yaml
recipes/llama-3-70b/vllm/disagg-single-node/deploy.yaml
+3
-3
recipes/llama-3-70b/vllm/disagg-single-node/gaie/deploy.yaml
recipes/llama-3-70b/vllm/disagg-single-node/gaie/deploy.yaml
+5
-5
recipes/qwen3-235b-a22b-fp8/trtllm/agg/deploy.yaml
recipes/qwen3-235b-a22b-fp8/trtllm/agg/deploy.yaml
+2
-2
recipes/qwen3-235b-a22b-fp8/trtllm/disagg/deploy.yaml
recipes/qwen3-235b-a22b-fp8/trtllm/disagg/deploy.yaml
+3
-3
recipes/qwen3-32b-fp8/trtllm/agg/deploy.yaml
recipes/qwen3-32b-fp8/trtllm/agg/deploy.yaml
+2
-2
recipes/qwen3-32b-fp8/trtllm/disagg/deploy.yaml
recipes/qwen3-32b-fp8/trtllm/disagg/deploy.yaml
+3
-3
No files found.
recipes/deepseek-r1/sglang/disagg-16gpu/deploy.yaml
View file @
3718da8c
...
...
@@ -21,7 +21,7 @@ spec:
mountPoint
:
/opt/model
extraPodSpec
:
mainContainer
:
image
:
nvcr.io/nvidia/ai-dynamo/sglang-runtime:
0.8
.0
image
:
nvcr.io/nvidia/ai-dynamo/sglang-runtime:
1.0
.0
decode
:
componentType
:
worker
subComponentType
:
decode
...
...
@@ -38,7 +38,7 @@ spec:
size
:
80Gi
extraPodSpec
:
mainContainer
:
image
:
nvcr.io/nvidia/ai-dynamo/sglang-runtime:
0.8
.0
image
:
nvcr.io/nvidia/ai-dynamo/sglang-runtime:
1.0
.0
workingDir
:
/sgl-workspace/dynamo
command
:
-
python3
...
...
@@ -85,7 +85,7 @@ spec:
size
:
80Gi
extraPodSpec
:
mainContainer
:
image
:
nvcr.io/nvidia/ai-dynamo/sglang-runtime:
0.8
.0
image
:
nvcr.io/nvidia/ai-dynamo/sglang-runtime:
1.0
.0
workingDir
:
/sgl-workspace/dynamo
command
:
-
python3
...
...
recipes/deepseek-r1/sglang/disagg-8gpu/deploy.yaml
View file @
3718da8c
...
...
@@ -21,7 +21,7 @@ spec:
mountPoint
:
/opt/model
extraPodSpec
:
mainContainer
:
image
:
nvcr.io/nvidia/ai-dynamo/sglang-runtime:
0.8
.0
image
:
nvcr.io/nvidia/ai-dynamo/sglang-runtime:
1.0
.0
decode
:
componentType
:
worker
subComponentType
:
decode
...
...
@@ -36,7 +36,7 @@ spec:
size
:
80Gi
extraPodSpec
:
mainContainer
:
image
:
nvcr.io/nvidia/ai-dynamo/sglang-runtime:
0.8
.0
image
:
nvcr.io/nvidia/ai-dynamo/sglang-runtime:
1.0
.0
workingDir
:
/workspace
command
:
-
python3
...
...
@@ -80,7 +80,7 @@ spec:
size
:
80Gi
extraPodSpec
:
mainContainer
:
image
:
nvcr.io/nvidia/ai-dynamo/sglang-runtime:
0.8
.0
image
:
nvcr.io/nvidia/ai-dynamo/sglang-runtime:
1.0
.0
workingDir
:
/workspace
command
:
-
python3
...
...
recipes/deepseek-r1/trtllm/disagg/wide_ep/gb200/deploy.yaml
View file @
3718da8c
...
...
@@ -126,7 +126,7 @@ spec:
tolerations
:
[]
affinity
:
{}
mainContainer
:
image
:
nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:
0.8
.0
image
:
nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:
1.0
.0
args
:
-
|
python3 -m dynamo.frontend --http-port 8000
...
...
@@ -158,7 +158,7 @@ spec:
tolerations
:
[]
affinity
:
{}
mainContainer
:
image
:
nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:
0.8
.0
image
:
nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:
1.0
.0
workingDir
:
/workspace/components/backends/trtllm
# NOTE: If your PVCs (Persistent Volume Claims) are really slow,
# you might need to increase 'failureThreshold' below to allow more time for startup
...
...
@@ -216,7 +216,7 @@ spec:
tolerations
:
[]
affinity
:
{}
mainContainer
:
image
:
nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:
0.8
.0
image
:
nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:
1.0
.0
workingDir
:
/workspace/components/backends/trtllm
# NOTE: If your PVCs (Persistent Volume Claims) are really slow,
# you might need to increase 'failureThreshold' below to allow more time for startup
...
...
recipes/deepseek-r1/vllm/disagg/deploy_hopper_16gpu.yaml
View file @
3718da8c
...
...
@@ -26,7 +26,7 @@ spec:
periodSeconds
:
10
timeoutSeconds
:
1800
failureThreshold
:
60
image
:
nvcr.io/nvidia/ai-dynamo/vllm-runtime:
0.8
.0
image
:
nvcr.io/nvidia/ai-dynamo/vllm-runtime:
1.0
.0
decode
:
componentType
:
worker
subComponentType
:
decode
...
...
@@ -52,7 +52,7 @@ spec:
periodSeconds
:
10
timeoutSeconds
:
10
failureThreshold
:
600
image
:
nvcr.io/nvidia/ai-dynamo/vllm-runtime:
0.8
.0
image
:
nvcr.io/nvidia/ai-dynamo/vllm-runtime:
1.0
.0
workingDir
:
/workspace/dynamo
env
:
-
name
:
VLLM_USE_DEEP_GEMM
...
...
@@ -124,7 +124,7 @@ spec:
periodSeconds
:
10
timeoutSeconds
:
10
failureThreshold
:
600
image
:
nvcr.io/nvidia/ai-dynamo/vllm-runtime:
0.8
.0
image
:
nvcr.io/nvidia/ai-dynamo/vllm-runtime:
1.0
.0
workingDir
:
/workspace/dynamo
env
:
-
name
:
VLLM_USE_DEEP_GEMM
...
...
recipes/gpt-oss-120b/trtllm/agg/deploy.yaml
View file @
3718da8c
...
...
@@ -45,7 +45,7 @@ spec:
command
:
-
/bin/sh
-
-c
image
:
nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:
0.8
.0
image
:
nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:
1.0
.0
replicas
:
1
TrtllmWorker
:
componentType
:
worker
...
...
@@ -79,7 +79,7 @@ spec:
command
:
-
/bin/sh
-
-c
image
:
nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:
0.8
.0
image
:
nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:
1.0
.0
env
:
-
name
:
TRTLLM_ENABLE_PDL
value
:
"
1"
...
...
recipes/gpt-oss-120b/trtllm/disagg/deploy.yaml
View file @
3718da8c
...
...
@@ -90,7 +90,7 @@ spec:
command
:
-
/bin/sh
-
-c
image
:
nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:
0.7
.0
image
:
nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:
1.0
.0
replicas
:
1
TrtllmPrefillWorker
:
componentType
:
main
...
...
@@ -122,7 +122,7 @@ spec:
command
:
-
/bin/sh
-
-c
image
:
nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:
0.7
.0
image
:
nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:
1.0
.0
env
:
-
name
:
TRTLLM_ENABLE_PDL
value
:
"
1"
...
...
@@ -187,7 +187,7 @@ spec:
command
:
-
/bin/sh
-
-c
image
:
nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:
0.7
.0
image
:
nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:
1.0
.0
env
:
-
name
:
TRTLLM_ENABLE_PDL
value
:
"
1"
...
...
recipes/kimi-k2.5/trtllm/agg/baseten/deploy.yaml
View file @
3718da8c
...
...
@@ -51,7 +51,7 @@ spec:
command
:
-
/bin/sh
-
-c
image
:
nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:
my-tag
image
:
nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:
1.0.0
replicas
:
1
TrtllmWorker
:
componentType
:
worker
...
...
@@ -84,7 +84,7 @@ spec:
command
:
-
/bin/sh
-
-c
image
:
nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:
my-tag
image
:
nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:
1.0.0
env
:
-
name
:
TRTLLM_ENABLE_PDL
value
:
"
1"
...
...
recipes/kimi-k2.5/trtllm/agg/deploy.yaml
View file @
3718da8c
...
...
@@ -51,7 +51,7 @@ spec:
command
:
-
/bin/sh
-
-c
image
:
nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:
my-tag
image
:
nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:
1.0.0
replicas
:
1
TrtllmWorker
:
componentType
:
worker
...
...
@@ -84,7 +84,7 @@ spec:
command
:
-
/bin/sh
-
-c
image
:
nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:
my-tag
image
:
nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:
1.0.0
env
:
-
name
:
TRTLLM_ENABLE_PDL
value
:
"
1"
...
...
recipes/kimi-k2.5/trtllm/agg/nvidia/deploy-kvbm.yaml
View file @
3718da8c
...
...
@@ -55,7 +55,7 @@ spec:
command
:
-
/bin/sh
-
-c
image
:
nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:
my-tag
image
:
nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:
1.0.0
replicas
:
1
TrtllmWorker
:
componentType
:
worker
...
...
@@ -95,7 +95,7 @@ spec:
command
:
-
/bin/sh
-
-c
image
:
nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:
my-tag
image
:
nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:
1.0.0
env
:
-
name
:
TRTLLM_ENABLE_PDL
value
:
"
1"
...
...
recipes/kimi-k2.5/trtllm/agg/nvidia/deploy.yaml
View file @
3718da8c
...
...
@@ -51,7 +51,7 @@ spec:
command
:
-
/bin/sh
-
-c
image
:
nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:
my-tag
image
:
nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:
1.0.0
replicas
:
1
TrtllmWorker
:
componentType
:
worker
...
...
@@ -84,7 +84,7 @@ spec:
command
:
-
/bin/sh
-
-c
image
:
nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:
my-tag
image
:
nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:
1.0.0
env
:
-
name
:
TRTLLM_ENABLE_PDL
value
:
"
1"
...
...
recipes/kimi-k2.5/trtllm/agg/nvidia/patch/README.md
View file @
3718da8c
...
...
@@ -16,7 +16,7 @@ For example:
```
bash
./patch-container.sh nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:my-tag
# produces image: nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:
my-tag
-patched
# produces image: nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:
1.0.0
-patched
```
If
`KimiK25ForConditionalGeneration`
is already registered, the patch is skipped. The script is idempotent -- re-running it on an already-patched image is a no-op.
...
...
recipes/llama-3-70b/vllm/agg/deploy.yaml
View file @
3718da8c
...
...
@@ -17,7 +17,7 @@ spec:
mountPoint
:
/opt/models
extraPodSpec
:
mainContainer
:
image
:
nvcr.io/nvidia/ai-dynamo/vllm-runtime:
0.8
.0
image
:
nvcr.io/nvidia/ai-dynamo/vllm-runtime:
1.0
.0
workingDir
:
/workspace/examples/backends/vllm
envs
:
-
name
:
HF_HOME
...
...
@@ -45,7 +45,7 @@ spec:
command
:
-
/bin/sh
-
-c
image
:
nvcr.io/nvidia/ai-dynamo/vllm-runtime:
0.8
.0
image
:
nvcr.io/nvidia/ai-dynamo/vllm-runtime:
1.0
.0
workingDir
:
/workspace/examples/backends/vllm
replicas
:
1
resources
:
...
...
recipes/llama-3-70b/vllm/agg/gaie/deploy.yaml
View file @
3718da8c
...
...
@@ -16,7 +16,7 @@ spec:
replicas
:
1
extraPodSpec
:
mainContainer
:
image
:
nvcr.io/nvidia/ai-dynamo/frontend:
my-tag
image
:
nvcr.io/nvidia/ai-dynamo/frontend:
1.0.0
eppConfig
:
# This config uses the same disagg-profile-handler as disaggregated deployments.
# The handler's graceful degradation feature makes this possible:
...
...
@@ -60,7 +60,7 @@ spec:
sharedMemory
:
size
:
20Gi
frontendSidecar
:
image
:
nvcr.io/nvidia/ai-dynamo/vllm-runtime:
my-tag
image
:
nvcr.io/nvidia/ai-dynamo/vllm-runtime:
1.0.0
args
:
-
-m
-
dynamo.frontend
...
...
@@ -83,7 +83,7 @@ spec:
command
:
-
/bin/sh
-
-c
image
:
nvcr.io/nvidia/ai-dynamo/vllm-runtime:
my-tag
image
:
nvcr.io/nvidia/ai-dynamo/vllm-runtime:
1.0.0
workingDir
:
/workspace/examples/backends/vllm
replicas
:
1
resources
:
...
...
recipes/llama-3-70b/vllm/disagg-multi-node/deploy.yaml
View file @
3718da8c
...
...
@@ -17,7 +17,7 @@ spec:
mountPoint
:
/opt/models
extraPodSpec
:
mainContainer
:
image
:
nvcr.io/nvidia/ai-dynamo/vllm-runtime:
0.8
.0
image
:
nvcr.io/nvidia/ai-dynamo/vllm-runtime:
1.0
.0
workingDir
:
/workspace/examples/backends/vllm
envs
:
-
name
:
HF_HOME
...
...
@@ -46,7 +46,7 @@ spec:
command
:
-
/bin/sh
-
-c
image
:
nvcr.io/nvidia/ai-dynamo/vllm-runtime:
0.8
.0
image
:
nvcr.io/nvidia/ai-dynamo/vllm-runtime:
1.0
.0
workingDir
:
/workspace/examples/backends/vllm
replicas
:
1
resources
:
...
...
@@ -77,7 +77,7 @@ spec:
command
:
-
/bin/sh
-
-c
image
:
nvcr.io/nvidia/ai-dynamo/vllm-runtime:
0.8
.0
image
:
nvcr.io/nvidia/ai-dynamo/vllm-runtime:
1.0
.0
workingDir
:
/workspace/examples/backends/vllm
replicas
:
1
resources
:
...
...
recipes/llama-3-70b/vllm/disagg-single-node/deploy.yaml
View file @
3718da8c
...
...
@@ -17,7 +17,7 @@ spec:
mountPoint
:
/opt/models
extraPodSpec
:
mainContainer
:
image
:
nvcr.io/nvidia/ai-dynamo/vllm-runtime:
0.8
.0
image
:
nvcr.io/nvidia/ai-dynamo/vllm-runtime:
1.0
.0
workingDir
:
/workspace/examples/backends/vllm
envs
:
-
name
:
HF_HOME
...
...
@@ -58,7 +58,7 @@ spec:
command
:
-
/bin/sh
-
-c
image
:
nvcr.io/nvidia/ai-dynamo/vllm-runtime:
0.8
.0
image
:
nvcr.io/nvidia/ai-dynamo/vllm-runtime:
1.0
.0
workingDir
:
/workspace/examples/backends/vllm
replicas
:
2
resources
:
...
...
@@ -101,7 +101,7 @@ spec:
command
:
-
/bin/sh
-
-c
image
:
nvcr.io/nvidia/ai-dynamo/vllm-runtime:
0.8
.0
image
:
nvcr.io/nvidia/ai-dynamo/vllm-runtime:
1.0
.0
workingDir
:
/workspace/examples/backends/vllm
replicas
:
1
resources
:
...
...
recipes/llama-3-70b/vllm/disagg-single-node/gaie/deploy.yaml
View file @
3718da8c
...
...
@@ -16,7 +16,7 @@ spec:
replicas
:
1
extraPodSpec
:
mainContainer
:
image
:
nvcr.io/nvidia/ai-dynamo/epp-image:
my-tag
image
:
nvcr.io/nvidia/ai-dynamo/epp-image:
1.0.0
eppConfig
:
config
:
plugins
:
...
...
@@ -68,7 +68,7 @@ spec:
sharedMemory
:
size
:
80Gi
frontendSidecar
:
image
:
nvcr.io/nvidia/ai-dynamo/vllm-runtime:
my-tag
image
:
nvcr.io/nvidia/ai-dynamo/vllm-runtime:
1.0.0
args
:
-
-m
-
dynamo.frontend
...
...
@@ -101,7 +101,7 @@ spec:
command
:
-
/bin/sh
-
-c
image
:
nvcr.io/nvidia/ai-dynamo/vllm-runtime:
my-tag
image
:
nvcr.io/nvidia/ai-dynamo/vllm-runtime:
1.0.0
workingDir
:
/workspace/examples/backends/vllm
replicas
:
2
resources
:
...
...
@@ -119,7 +119,7 @@ spec:
sharedMemory
:
size
:
80Gi
frontendSidecar
:
image
:
nvcr.io/nvidia/ai-dynamo/vllm-runtime:
my-tag
image
:
nvcr.io/nvidia/ai-dynamo/vllm-runtime:
1.0.0
args
:
-
-m
-
dynamo.frontend
...
...
@@ -152,7 +152,7 @@ spec:
command
:
-
/bin/sh
-
-c
image
:
nvcr.io/nvidia/ai-dynamo/vllm-runtime:
my-tag
image
:
nvcr.io/nvidia/ai-dynamo/vllm-runtime:
1.0.0
workingDir
:
/workspace/examples/backends/vllm
replicas
:
1
resources
:
...
...
recipes/qwen3-235b-a22b-fp8/trtllm/agg/deploy.yaml
View file @
3718da8c
...
...
@@ -53,7 +53,7 @@ spec:
-
qwen3-235b-a22b-agg-frontend
topologyKey
:
kubernetes.io/hostname
mainContainer
:
image
:
nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:
0.8
.0
image
:
nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:
1.0
.0
args
:
-
python3 -m dynamo.frontend --router-mode kv --http-port
8000
command
:
...
...
@@ -94,7 +94,7 @@ spec:
--max-num-tokens 8192 \
--max-seq-len 8192 \
--extra-engine-args "${ENGINE_ARGS}"
image
:
nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:
0.8
.0
image
:
nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:
1.0
.0
workingDir
:
/workspace/components/backends/trtllm
volumeMounts
:
-
name
:
agg-config
...
...
recipes/qwen3-235b-a22b-fp8/trtllm/disagg/deploy.yaml
View file @
3718da8c
...
...
@@ -75,7 +75,7 @@ spec:
-
qwen3-235b-a22b-disagg-frontend
topologyKey
:
kubernetes.io/hostname
mainContainer
:
image
:
nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:
0.8
.0
image
:
nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:
1.0
.0
args
:
-
python3 -m dynamo.frontend --router-mode kv --http-port
8000
command
:
...
...
@@ -109,7 +109,7 @@ spec:
value
:
/mnt/model-cache
-
name
:
ENGINE_ARGS
value
:
/engine_configs/prefill.yaml
image
:
nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:
0.8
.0
image
:
nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:
1.0
.0
workingDir
:
/workspace/components/backends/trtllm
command
:
-
/bin/sh
...
...
@@ -164,7 +164,7 @@ spec:
value
:
/mnt/model-cache
-
name
:
ENGINE_ARGS
value
:
/engine_configs/decode.yaml
image
:
nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:
0.8
.0
image
:
nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:
1.0
.0
workingDir
:
/workspace/components/backends/trtllm
command
:
-
/bin/sh
...
...
recipes/qwen3-32b-fp8/trtllm/agg/deploy.yaml
View file @
3718da8c
...
...
@@ -61,7 +61,7 @@ spec:
command
:
-
/bin/sh
-
-c
image
:
nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:
0.8
.0
image
:
nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:
1.0
.0
replicas
:
1
TrtllmWorker
:
componentType
:
worker
...
...
@@ -94,7 +94,7 @@ spec:
command
:
-
/bin/sh
-
-c
image
:
nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:
0.8
.0
image
:
nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:
1.0
.0
env
:
-
name
:
TRTLLM_ENABLE_PDL
value
:
"
1"
...
...
recipes/qwen3-32b-fp8/trtllm/disagg/deploy.yaml
View file @
3718da8c
...
...
@@ -218,7 +218,7 @@ spec:
command
:
-
/bin/sh
-
-c
image
:
nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:
0.8
.0
image
:
nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:
1.0
.0
replicas
:
1
TrtllmPrefillWorker
:
componentType
:
worker
...
...
@@ -253,7 +253,7 @@ spec:
command
:
-
/bin/sh
-
-c
image
:
nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:
0.8
.0
image
:
nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:
1.0
.0
env
:
-
name
:
TRTLLM_ENABLE_PDL
value
:
"
1"
...
...
@@ -313,7 +313,7 @@ spec:
command
:
-
/bin/sh
-
-c
image
:
nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:
0.8
.0
image
:
nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:
1.0
.0
env
:
-
name
:
TRTLLM_ENABLE_PDL
value
:
"
1"
...
...
Prev
1
2
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment