Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
dynamo
Commits
a01b7992
Unverified
Commit
a01b7992
authored
Sep 29, 2025
by
Julien Mancuso
Committed by
GitHub
Sep 29, 2025
Browse files
feat: use python3 in all examples (#3259)
Signed-off-by:
Julien Mancuso
<
jmancuso@nvidia.com
>
parent
02209bbb
Changes
18
Hide whitespace changes
Inline
Side-by-side
Showing
18 changed files
with
291 additions
and
164 deletions
+291
-164
components/backends/sglang/deploy/agg.yaml
components/backends/sglang/deploy/agg.yaml
+13
-10
components/backends/sglang/deploy/agg_logging.yaml
components/backends/sglang/deploy/agg_logging.yaml
+13
-10
components/backends/sglang/deploy/agg_router.yaml
components/backends/sglang/deploy/agg_router.yaml
+13
-10
components/backends/sglang/deploy/disagg-multinode.yaml
components/backends/sglang/deploy/disagg-multinode.yaml
+40
-24
components/backends/sglang/deploy/disagg.yaml
components/backends/sglang/deploy/disagg.yaml
+34
-24
components/backends/sglang/deploy/disagg_planner.yaml
components/backends/sglang/deploy/disagg_planner.yaml
+7
-8
components/backends/trtllm/deploy/agg-with-config.yaml
components/backends/trtllm/deploy/agg-with-config.yaml
+9
-7
components/backends/trtllm/deploy/agg.yaml
components/backends/trtllm/deploy/agg.yaml
+9
-7
components/backends/trtllm/deploy/agg_router.yaml
components/backends/trtllm/deploy/agg_router.yaml
+10
-8
components/backends/trtllm/deploy/disagg-multinode.yaml
components/backends/trtllm/deploy/disagg-multinode.yaml
+31
-9
components/backends/trtllm/deploy/disagg.yaml
components/backends/trtllm/deploy/disagg.yaml
+26
-6
components/backends/trtllm/deploy/disagg_router.yaml
components/backends/trtllm/deploy/disagg_router.yaml
+27
-6
components/backends/vllm/deploy/agg.yaml
components/backends/vllm/deploy/agg.yaml
+5
-3
components/backends/vllm/deploy/agg_router.yaml
components/backends/vllm/deploy/agg_router.yaml
+5
-3
components/backends/vllm/deploy/disagg-multinode.yaml
components/backends/vllm/deploy/disagg-multinode.yaml
+20
-9
components/backends/vllm/deploy/disagg.yaml
components/backends/vllm/deploy/disagg.yaml
+11
-6
components/backends/vllm/deploy/disagg_planner.yaml
components/backends/vllm/deploy/disagg_planner.yaml
+7
-8
components/backends/vllm/deploy/disagg_router.yaml
components/backends/vllm/deploy/disagg_router.yaml
+11
-6
No files found.
components/backends/sglang/deploy/agg.yaml
View file @
a01b7992
...
@@ -27,15 +27,18 @@ spec:
...
@@ -27,15 +27,18 @@ spec:
image
:
my-registry/sglang-runtime:my-tag
image
:
my-registry/sglang-runtime:my-tag
workingDir
:
/workspace/components/backends/sglang
workingDir
:
/workspace/components/backends/sglang
command
:
command
:
-
/bin/sh
-
python3
-
-c
-
-m
-
dynamo.sglang
args
:
args
:
-
>-
-
--model-path
python3 -m dynamo.sglang
-
Qwen/Qwen3-0.6B
--model-path Qwen/Qwen3-0.6B
-
--served-model-name
--served-model-name Qwen/Qwen3-0.6B
-
Qwen/Qwen3-0.6B
--page-size 16
-
--page-size
--tp 1
-
"
16"
--trust-remote-code
-
--tp
--skip-tokenizer-init
-
"
1"
-
--trust-remote-code
-
--skip-tokenizer-init
components/backends/sglang/deploy/agg_logging.yaml
View file @
a01b7992
...
@@ -30,14 +30,17 @@ spec:
...
@@ -30,14 +30,17 @@ spec:
image
:
my-registry/sglang-runtime:my-tag
image
:
my-registry/sglang-runtime:my-tag
workingDir
:
/workspace/components/backends/sglang
workingDir
:
/workspace/components/backends/sglang
command
:
command
:
-
/bin/sh
-
python3
-
-c
-
-m
-
dynamo.sglang
args
:
args
:
-
>-
-
--model-path
python3 -m dynamo.sglang
-
Qwen/Qwen3-0.6B
--model-path Qwen/Qwen3-0.6B
-
--served-model-name
--served-model-name Qwen/Qwen3-0.6B
-
Qwen/Qwen3-0.6B
--page-size 16
-
--page-size
--tp 1
-
"
16"
--trust-remote-code
-
--tp
--skip-tokenizer-init
-
"
1"
\ No newline at end of file
-
--trust-remote-code
-
--skip-tokenizer-init
\ No newline at end of file
components/backends/sglang/deploy/agg_router.yaml
View file @
a01b7992
...
@@ -30,14 +30,17 @@ spec:
...
@@ -30,14 +30,17 @@ spec:
image
:
my-registry/sglang-runtime:my-tag
image
:
my-registry/sglang-runtime:my-tag
workingDir
:
/workspace/components/backends/sglang
workingDir
:
/workspace/components/backends/sglang
command
:
command
:
-
/bin/sh
-
python3
-
-c
-
-m
-
dynamo.sglang
args
:
args
:
-
>-
-
--model-path
python3 -m dynamo.sglang
-
Qwen/Qwen3-0.6B
--model-path Qwen/Qwen3-0.6B
-
--served-model-name
--served-model-name Qwen/Qwen3-0.6B
-
Qwen/Qwen3-0.6B
--page-size 16
-
--page-size
--tp 1
-
"
16"
--trust-remote-code
-
--tp
--skip-tokenizer-init
-
"
1"
-
--trust-remote-code
-
--skip-tokenizer-init
components/backends/sglang/deploy/disagg-multinode.yaml
View file @
a01b7992
...
@@ -37,19 +37,27 @@ spec:
...
@@ -37,19 +37,27 @@ spec:
mainContainer
:
mainContainer
:
image
:
my-registry/sglang-runtime:my-tag
image
:
my-registry/sglang-runtime:my-tag
workingDir
:
/workspace/components/backends/sglang
workingDir
:
/workspace/components/backends/sglang
command
:
[
"
sh"
,
"
-c"
]
command
:
-
python3
-
-m
-
dynamo.sglang
args
:
args
:
-
>-
-
--model-path
python3 -m dynamo.sglang
-
Qwen/Qwen3-0.6B
--model-path Qwen/Qwen3-0.6B
-
--served-model-name
--served-model-name Qwen/Qwen3-0.6B
-
Qwen/Qwen3-0.6B
--tp-size 8
-
--tp-size
--trust-remote-code
-
"
8"
--skip-tokenizer-init
-
--trust-remote-code
--disaggregation-mode decode
-
--skip-tokenizer-init
--disaggregation-transfer-backend nixl
-
--disaggregation-mode
--disaggregation-bootstrap-port 30001
-
decode
--mem-fraction-static 0.82
-
--disaggregation-transfer-backend
-
nixl
-
--disaggregation-bootstrap-port
-
"
30001"
-
--mem-fraction-static
-
"
0.82"
prefill
:
prefill
:
multinode
:
multinode
:
nodeCount
:
2
nodeCount
:
2
...
@@ -64,16 +72,24 @@ spec:
...
@@ -64,16 +72,24 @@ spec:
mainContainer
:
mainContainer
:
image
:
my-registry/sglang-runtime:my-tag
image
:
my-registry/sglang-runtime:my-tag
workingDir
:
/workspace/components/backends/sglang
workingDir
:
/workspace/components/backends/sglang
command
:
[
"
sh"
,
"
-c"
]
command
:
-
python3
-
-m
-
dynamo.sglang
args
:
args
:
-
>-
-
--model-path
python3 -m dynamo.sglang
-
Qwen/Qwen3-0.6B
--model-path Qwen/Qwen3-0.6B
-
--served-model-name
--served-model-name Qwen/Qwen3-0.6B
-
Qwen/Qwen3-0.6B
--tp-size 8
-
--tp-size
--trust-remote-code
-
"
8"
--skip-tokenizer-init
-
--trust-remote-code
--disaggregation-mode prefill
-
--skip-tokenizer-init
--disaggregation-transfer-backend nixl
-
--disaggregation-mode
--disaggregation-bootstrap-port 30001
-
prefill
--mem-fraction-static 0.82
-
--disaggregation-transfer-backend
-
nixl
-
--disaggregation-bootstrap-port
-
"
30001"
-
--mem-fraction-static
-
"
0.82"
components/backends/sglang/deploy/disagg.yaml
View file @
a01b7992
...
@@ -27,19 +27,24 @@ spec:
...
@@ -27,19 +27,24 @@ spec:
image
:
my-registry/sglang-runtime:my-tag
image
:
my-registry/sglang-runtime:my-tag
workingDir
:
/workspace/components/backends/sglang
workingDir
:
/workspace/components/backends/sglang
command
:
command
:
-
/bin/sh
-
python3
-
-c
-
-m
-
dynamo.sglang
args
:
args
:
-
>-
-
--model-path
python3 -m dynamo.sglang
-
Qwen/Qwen3-0.6B
--model-path Qwen/Qwen3-0.6B
-
--served-model-name
--served-model-name Qwen/Qwen3-0.6B
-
Qwen/Qwen3-0.6B
--page-size 16
-
--page-size
--tp 1
-
"
16"
--trust-remote-code
-
--tp
--skip-tokenizer-init
-
"
1"
--disaggregation-mode decode
-
--trust-remote-code
--disaggregation-transfer-backend nixl
-
--skip-tokenizer-init
-
--disaggregation-mode
-
decode
-
--disaggregation-transfer-backend
-
nixl
prefill
:
prefill
:
envFromSecret
:
hf-token-secret
envFromSecret
:
hf-token-secret
...
@@ -54,16 +59,21 @@ spec:
...
@@ -54,16 +59,21 @@ spec:
image
:
my-registry/sglang-runtime:my-tag
image
:
my-registry/sglang-runtime:my-tag
workingDir
:
/workspace/components/backends/sglang
workingDir
:
/workspace/components/backends/sglang
command
:
command
:
-
/bin/sh
-
python3
-
-c
-
-m
-
dynamo.sglang
args
:
args
:
-
>-
-
--model-path
python3 -m dynamo.sglang
-
Qwen/Qwen3-0.6B
--model-path Qwen/Qwen3-0.6B
-
--served-model-name
--served-model-name Qwen/Qwen3-0.6B
-
Qwen/Qwen3-0.6B
--page-size 16
-
--page-size
--tp 1
-
"
16"
--trust-remote-code
-
--tp
--skip-tokenizer-init
-
"
1"
--disaggregation-mode prefill
-
--trust-remote-code
--disaggregation-transfer-backend nixl
-
--skip-tokenizer-init
-
--disaggregation-mode
-
prefill
-
--disaggregation-transfer-backend
-
nixl
components/backends/sglang/deploy/disagg_planner.yaml
View file @
a01b7992
...
@@ -50,15 +50,14 @@ spec:
...
@@ -50,15 +50,14 @@ spec:
image
:
my-registry/sglang-runtime:my-tag
image
:
my-registry/sglang-runtime:my-tag
workingDir
:
/workspace/components/planner/src/dynamo/planner
workingDir
:
/workspace/components/planner/src/dynamo/planner
command
:
command
:
-
/bin/sh
-
python3
-
-c
-
-m
-
planner_sla
args
:
args
:
-
>-
-
--environment=kubernetes
python3 -m planner_sla
-
--backend=sglang
--environment=kubernetes
-
--adjustment-interval=60
--backend=sglang
-
--profile-results-dir=/data/profiling_results
--adjustment-interval=60
--profile-results-dir=/data/profiling_results
decode
:
decode
:
dynamoNamespace
:
dynamo
dynamoNamespace
:
dynamo
envFromSecret
:
hf-token-secret
envFromSecret
:
hf-token-secret
...
...
components/backends/trtllm/deploy/agg-with-config.yaml
View file @
a01b7992
...
@@ -58,11 +58,13 @@ spec:
...
@@ -58,11 +58,13 @@ spec:
mountPath
:
/workspace/components/backends/trtllm/engine_configs
mountPath
:
/workspace/components/backends/trtllm/engine_configs
readOnly
:
true
readOnly
:
true
command
:
command
:
-
/bin/sh
-
python3
-
-c
-
-m
-
dynamo.trtllm
args
:
args
:
-
>-
-
--model-path
python3 -m dynamo.trtllm
-
Qwen/Qwen3-0.6B
--model-path Qwen/Qwen3-0.6B
-
--served-model-name
--served-model-name Qwen/Qwen3-0.6B
-
Qwen/Qwen3-0.6B
--extra-engine-args engine_configs/agg.yaml
-
--extra-engine-args
-
engine_configs/agg.yaml
components/backends/trtllm/deploy/agg.yaml
View file @
a01b7992
...
@@ -27,11 +27,13 @@ spec:
...
@@ -27,11 +27,13 @@ spec:
image
:
my-registry/trtllm-runtime:my-tag
image
:
my-registry/trtllm-runtime:my-tag
workingDir
:
/workspace/components/backends/trtllm
workingDir
:
/workspace/components/backends/trtllm
command
:
command
:
-
/bin/sh
-
python3
-
-c
-
-m
-
dynamo.trtllm
args
:
args
:
-
>-
-
--model-path
python3 -m dynamo.trtllm
-
Qwen/Qwen3-0.6B
--model-path Qwen/Qwen3-0.6B
-
--served-model-name
--served-model-name Qwen/Qwen3-0.6B
-
Qwen/Qwen3-0.6B
--extra-engine-args engine_configs/agg.yaml
-
--extra-engine-args
-
engine_configs/agg.yaml
components/backends/trtllm/deploy/agg_router.yaml
View file @
a01b7992
...
@@ -30,12 +30,14 @@ spec:
...
@@ -30,12 +30,14 @@ spec:
image
:
my-registry/trtllm-runtime:my-tag
image
:
my-registry/trtllm-runtime:my-tag
workingDir
:
/workspace/components/backends/trtllm
workingDir
:
/workspace/components/backends/trtllm
command
:
command
:
-
/bin/sh
-
python3
-
-c
-
-m
-
dynamo.trtllm
args
:
args
:
-
>-
-
--model-path
python3 -m dynamo.trtllm
-
Qwen/Qwen3-0.6B
--model-path Qwen/Qwen3-0.6B
-
--served-model-name
--served-model-name Qwen/Qwen3-0.6B
-
Qwen/Qwen3-0.6B
--extra-engine-args engine_configs/agg.yaml
-
--extra-engine-args
--publish-events-and-metrics
-
engine_configs/agg.yaml
-
--publish-events-and-metrics
components/backends/trtllm/deploy/disagg-multinode.yaml
View file @
a01b7992
...
@@ -96,10 +96,12 @@ spec:
...
@@ -96,10 +96,12 @@ spec:
image
:
my-registry/trtllm-runtime:my-tag
image
:
my-registry/trtllm-runtime:my-tag
workingDir
:
/workspace/components/backends/trtllm
workingDir
:
/workspace/components/backends/trtllm
command
:
command
:
-
/bin/sh
-
python3
-
-c
-
-m
-
dynamo.frontend
args
:
args
:
-
"
python3
-m
dynamo.frontend
--http-port
8000"
-
--http-port
-
"
8000"
prefill
:
prefill
:
pvc
:
pvc
:
name
:
models
name
:
models
...
@@ -126,10 +128,20 @@ spec:
...
@@ -126,10 +128,20 @@ spec:
image
:
my-registry/trtllm-runtime:my-tag
image
:
my-registry/trtllm-runtime:my-tag
workingDir
:
/workspace/components/backends/trtllm
workingDir
:
/workspace/components/backends/trtllm
command
:
command
:
-
/bin/sh
-
python3
-
-c
-
-m
-
dynamo.trtllm
args
:
args
:
-
"
python3
-m
dynamo.trtllm
--model-path
Qwen/Qwen3-0.6B
--served-model-name
Qwen/Qwen3-0.6B
--extra-engine-args
engine_configs/prefill.yaml
--disaggregation-mode
prefill
--disaggregation-strategy
decode_first"
-
--model-path
-
Qwen/Qwen3-0.6B
-
--served-model-name
-
Qwen/Qwen3-0.6B
-
--extra-engine-args
-
engine_configs/prefill.yaml
-
--disaggregation-mode
-
prefill
-
--disaggregation-strategy
-
decode_first
decode
:
decode
:
pvc
:
pvc
:
name
:
models
name
:
models
...
@@ -156,7 +168,17 @@ spec:
...
@@ -156,7 +168,17 @@ spec:
image
:
my-registry/trtllm-runtime:my-tag
image
:
my-registry/trtllm-runtime:my-tag
workingDir
:
/workspace/components/backends/trtllm
workingDir
:
/workspace/components/backends/trtllm
command
:
command
:
-
/bin/sh
-
python3
-
-c
-
-m
-
dynamo.trtllm
args
:
args
:
-
"
python3
-m
dynamo.trtllm
--model-path
Qwen/Qwen3-0.6B
--served-model-name
Qwen/Qwen3-0.6B
--extra-engine-args
engine_configs/decode.yaml
--disaggregation-mode
decode
--disaggregation-strategy
decode_first"
-
--model-path
-
Qwen/Qwen3-0.6B
-
--served-model-name
-
Qwen/Qwen3-0.6B
-
--extra-engine-args
-
engine_configs/decode.yaml
-
--disaggregation-mode
-
decode
-
--disaggregation-strategy
-
decode_first
components/backends/trtllm/deploy/disagg.yaml
View file @
a01b7992
...
@@ -27,10 +27,20 @@ spec:
...
@@ -27,10 +27,20 @@ spec:
image
:
my-registry/trtllm-runtime:my-tag
image
:
my-registry/trtllm-runtime:my-tag
workingDir
:
/workspace/components/backends/trtllm
workingDir
:
/workspace/components/backends/trtllm
command
:
command
:
-
/bin/sh
-
python3
-
-c
-
-m
-
dynamo.trtllm
args
:
args
:
-
"
python3
-m
dynamo.trtllm
--model-path
Qwen/Qwen3-0.6B
--served-model-name
Qwen/Qwen3-0.6B
--extra-engine-args
engine_configs/prefill.yaml
--disaggregation-mode
prefill
--disaggregation-strategy
decode_first"
-
--model-path
-
Qwen/Qwen3-0.6B
-
--served-model-name
-
Qwen/Qwen3-0.6B
-
--extra-engine-args
-
engine_configs/prefill.yaml
-
--disaggregation-mode
-
prefill
-
--disaggregation-strategy
-
decode_first
TRTLLMDecodeWorker
:
TRTLLMDecodeWorker
:
dynamoNamespace
:
trtllm-disagg
dynamoNamespace
:
trtllm-disagg
envFromSecret
:
hf-token-secret
envFromSecret
:
hf-token-secret
...
@@ -44,7 +54,17 @@ spec:
...
@@ -44,7 +54,17 @@ spec:
image
:
my-registry/trtllm-runtime:my-tag
image
:
my-registry/trtllm-runtime:my-tag
workingDir
:
/workspace/components/backends/trtllm
workingDir
:
/workspace/components/backends/trtllm
command
:
command
:
-
/bin/sh
-
python3
-
-c
-
-m
-
dynamo.trtllm
args
:
args
:
-
"
python3
-m
dynamo.trtllm
--model-path
Qwen/Qwen3-0.6B
--served-model-name
Qwen/Qwen3-0.6B
--extra-engine-args
engine_configs/decode.yaml
--disaggregation-mode
decode
--disaggregation-strategy
decode_first"
-
--model-path
-
Qwen/Qwen3-0.6B
-
--served-model-name
-
Qwen/Qwen3-0.6B
-
--extra-engine-args
-
engine_configs/decode.yaml
-
--disaggregation-mode
-
decode
-
--disaggregation-strategy
-
decode_first
components/backends/trtllm/deploy/disagg_router.yaml
View file @
a01b7992
...
@@ -30,10 +30,21 @@ spec:
...
@@ -30,10 +30,21 @@ spec:
image
:
my-registry/trtllm-runtime:my-tag
image
:
my-registry/trtllm-runtime:my-tag
workingDir
:
/workspace/components/backends/trtllm
workingDir
:
/workspace/components/backends/trtllm
command
:
command
:
-
/bin/sh
-
python3
-
-c
-
-m
-
dynamo.trtllm
args
:
args
:
-
"
python3
-m
dynamo.trtllm
--model-path
Qwen/Qwen3-0.6B
--served-model-name
Qwen/Qwen3-0.6B
--extra-engine-args
engine_configs/prefill.yaml
--disaggregation-mode
prefill
--disaggregation-strategy
prefill_first
--publish-events-and-metrics"
-
--model-path
-
Qwen/Qwen3-0.6B
-
--served-model-name
-
Qwen/Qwen3-0.6B
-
--extra-engine-args
-
engine_configs/prefill.yaml
-
--disaggregation-mode
-
prefill
-
--disaggregation-strategy
-
prefill_first
-
--publish-events-and-metrics
TRTLLMDecodeWorker
:
TRTLLMDecodeWorker
:
dynamoNamespace
:
trtllm-v1-disagg-router
dynamoNamespace
:
trtllm-v1-disagg-router
envFromSecret
:
hf-token-secret
envFromSecret
:
hf-token-secret
...
@@ -47,7 +58,17 @@ spec:
...
@@ -47,7 +58,17 @@ spec:
image
:
my-registry/trtllm-runtime:my-tag
image
:
my-registry/trtllm-runtime:my-tag
workingDir
:
/workspace/components/backends/trtllm
workingDir
:
/workspace/components/backends/trtllm
command
:
command
:
-
/bin/sh
-
python3
-
-c
-
-m
-
dynamo.trtllm
args
:
args
:
-
"
python3
-m
dynamo.trtllm
--model-path
Qwen/Qwen3-0.6B
--served-model-name
Qwen/Qwen3-0.6B
--extra-engine-args
engine_configs/decode.yaml
--disaggregation-mode
decode
--disaggregation-strategy
prefill_first"
-
--model-path
-
Qwen/Qwen3-0.6B
-
--served-model-name
-
Qwen/Qwen3-0.6B
-
--extra-engine-args
-
engine_configs/decode.yaml
-
--disaggregation-mode
-
decode
-
--disaggregation-strategy
-
prefill_first
components/backends/vllm/deploy/agg.yaml
View file @
a01b7992
...
@@ -27,7 +27,9 @@ spec:
...
@@ -27,7 +27,9 @@ spec:
image
:
nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
image
:
nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
workingDir
:
/workspace/components/backends/vllm
workingDir
:
/workspace/components/backends/vllm
command
:
command
:
-
/bin/sh
-
python3
-
-c
-
-m
-
dynamo.vllm
args
:
args
:
-
python3 -m dynamo.vllm --model Qwen/Qwen3-0.6B
-
--model
-
Qwen/Qwen3-0.6B
components/backends/vllm/deploy/agg_router.yaml
View file @
a01b7992
...
@@ -30,7 +30,9 @@ spec:
...
@@ -30,7 +30,9 @@ spec:
image
:
nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
image
:
nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
workingDir
:
/workspace/components/backends/vllm
workingDir
:
/workspace/components/backends/vllm
command
:
command
:
-
/bin/sh
-
python3
-
-c
-
-m
-
dynamo.vllm
args
:
args
:
-
python3 -m dynamo.vllm --model Qwen/Qwen3-0.6B
-
--model
-
Qwen/Qwen3-0.6B
components/backends/vllm/deploy/disagg-multinode.yaml
View file @
a01b7992
...
@@ -16,10 +16,12 @@ spec:
...
@@ -16,10 +16,12 @@ spec:
image
:
my-registry/vllm-runtime:my-tag
image
:
my-registry/vllm-runtime:my-tag
workingDir
:
/workspace/components/backends/vllm
workingDir
:
/workspace/components/backends/vllm
command
:
command
:
-
/bin/sh
-
python3
-
-c
-
-m
-
dynamo.frontend
args
:
args
:
-
"
python3
-m
dynamo.frontend
--http-port
8000"
-
--http-port
-
"
8000"
decode
:
decode
:
dynamoNamespace
:
vllm-disagg
dynamoNamespace
:
vllm-disagg
envFromSecret
:
hf-token-secret
envFromSecret
:
hf-token-secret
...
@@ -35,10 +37,14 @@ spec:
...
@@ -35,10 +37,14 @@ spec:
image
:
my-registry/vllm-runtime:my-tag
image
:
my-registry/vllm-runtime:my-tag
workingDir
:
/workspace/components/backends/vllm
workingDir
:
/workspace/components/backends/vllm
command
:
command
:
-
/bin/sh
-
python3
-
-c
-
-m
-
dynamo.vllm
args
:
args
:
-
"
python3
-m
dynamo.vllm
--model
Qwen/Qwen3-0.6B
--tensor-parallel-size
2"
-
--model
-
Qwen/Qwen3-0.6B
-
--tensor-parallel-size
-
"
2"
prefill
:
prefill
:
dynamoNamespace
:
vllm-disagg
dynamoNamespace
:
vllm-disagg
envFromSecret
:
hf-token-secret
envFromSecret
:
hf-token-secret
...
@@ -54,7 +60,12 @@ spec:
...
@@ -54,7 +60,12 @@ spec:
image
:
my-registry/vllm-runtime:my-tag
image
:
my-registry/vllm-runtime:my-tag
workingDir
:
/workspace/components/backends/vllm
workingDir
:
/workspace/components/backends/vllm
command
:
command
:
-
/bin/sh
-
python3
-
-c
-
-m
-
dynamo.vllm
args
:
args
:
-
"
python3
-m
dynamo.vllm
--model
Qwen/Qwen3-0.6B
--is-prefill-worker
--tensor-parallel-size
2"
-
--model
-
Qwen/Qwen3-0.6B
-
--is-prefill-worker
-
--tensor-parallel-size
-
"
2"
components/backends/vllm/deploy/disagg.yaml
View file @
a01b7992
...
@@ -27,10 +27,12 @@ spec:
...
@@ -27,10 +27,12 @@ spec:
image
:
nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
image
:
nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
workingDir
:
/workspace/components/backends/vllm
workingDir
:
/workspace/components/backends/vllm
command
:
command
:
-
/bin/sh
-
python3
-
-c
-
-m
-
dynamo.vllm
args
:
args
:
-
"
python3
-m
dynamo.vllm
--model
Qwen/Qwen3-0.6B"
-
--model
-
Qwen/Qwen3-0.6B
VllmPrefillWorker
:
VllmPrefillWorker
:
dynamoNamespace
:
vllm-disagg
dynamoNamespace
:
vllm-disagg
envFromSecret
:
hf-token-secret
envFromSecret
:
hf-token-secret
...
@@ -44,7 +46,10 @@ spec:
...
@@ -44,7 +46,10 @@ spec:
image
:
nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
image
:
nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
workingDir
:
/workspace/components/backends/vllm
workingDir
:
/workspace/components/backends/vllm
command
:
command
:
-
/bin/sh
-
python3
-
-c
-
-m
-
dynamo.vllm
args
:
args
:
-
"
python3
-m
dynamo.vllm
--model
Qwen/Qwen3-0.6B
--is-prefill-worker"
-
--model
-
Qwen/Qwen3-0.6B
-
--is-prefill-worker
components/backends/vllm/deploy/disagg_planner.yaml
View file @
a01b7992
...
@@ -50,15 +50,14 @@ spec:
...
@@ -50,15 +50,14 @@ spec:
image
:
nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
image
:
nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
workingDir
:
/workspace/components/planner/src/dynamo/planner
workingDir
:
/workspace/components/planner/src/dynamo/planner
command
:
command
:
-
/bin/sh
-
python3
-
-c
-
-m
-
planner_sla
args
:
args
:
-
>-
-
--environment=kubernetes
python3 -m planner_sla
-
--backend=vllm
--environment=kubernetes
-
--adjustment-interval=60
--backend=vllm
-
--profile-results-dir=/data/profiling_results
--adjustment-interval=60
--profile-results-dir=/data/profiling_results
VllmDecodeWorker
:
VllmDecodeWorker
:
dynamoNamespace
:
vllm-disagg-planner
dynamoNamespace
:
vllm-disagg-planner
envFromSecret
:
hf-token-secret
envFromSecret
:
hf-token-secret
...
...
components/backends/vllm/deploy/disagg_router.yaml
View file @
a01b7992
...
@@ -30,10 +30,12 @@ spec:
...
@@ -30,10 +30,12 @@ spec:
image
:
nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
image
:
nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
workingDir
:
/workspace/components/backends/vllm
workingDir
:
/workspace/components/backends/vllm
command
:
command
:
-
/bin/sh
-
python3
-
-c
-
-m
-
dynamo.vllm
args
:
args
:
-
python3 -m dynamo.vllm --model Qwen/Qwen3-0.6B
-
--model
-
Qwen/Qwen3-0.6B
VllmPrefillWorker
:
VllmPrefillWorker
:
dynamoNamespace
:
vllm-v1-disagg-router
dynamoNamespace
:
vllm-v1-disagg-router
envFromSecret
:
hf-token-secret
envFromSecret
:
hf-token-secret
...
@@ -47,7 +49,10 @@ spec:
...
@@ -47,7 +49,10 @@ spec:
image
:
nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
image
:
nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
workingDir
:
/workspace/components/backends/vllm
workingDir
:
/workspace/components/backends/vllm
command
:
command
:
-
/bin/sh
-
python3
-
-c
-
-m
-
dynamo.vllm
args
:
args
:
-
python3 -m dynamo.vllm --model Qwen/Qwen3-0.6B --is-prefill-worker
-
--model
-
Qwen/Qwen3-0.6B
-
--is-prefill-worker
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment