Unverified Commit a01b7992 authored by Julien Mancuso's avatar Julien Mancuso Committed by GitHub
Browse files

feat: use python3 in all examples (#3259)


Signed-off-by: default avatarJulien Mancuso <jmancuso@nvidia.com>
parent 02209bbb
...@@ -27,15 +27,18 @@ spec: ...@@ -27,15 +27,18 @@ spec:
image: my-registry/sglang-runtime:my-tag image: my-registry/sglang-runtime:my-tag
workingDir: /workspace/components/backends/sglang workingDir: /workspace/components/backends/sglang
command: command:
- /bin/sh - python3
- -c - -m
- dynamo.sglang
args: args:
- >- - --model-path
python3 -m dynamo.sglang - Qwen/Qwen3-0.6B
--model-path Qwen/Qwen3-0.6B - --served-model-name
--served-model-name Qwen/Qwen3-0.6B - Qwen/Qwen3-0.6B
--page-size 16 - --page-size
--tp 1 - "16"
--trust-remote-code - --tp
--skip-tokenizer-init - "1"
- --trust-remote-code
- --skip-tokenizer-init
...@@ -30,14 +30,17 @@ spec: ...@@ -30,14 +30,17 @@ spec:
image: my-registry/sglang-runtime:my-tag image: my-registry/sglang-runtime:my-tag
workingDir: /workspace/components/backends/sglang workingDir: /workspace/components/backends/sglang
command: command:
- /bin/sh - python3
- -c - -m
- dynamo.sglang
args: args:
- >- - --model-path
python3 -m dynamo.sglang - Qwen/Qwen3-0.6B
--model-path Qwen/Qwen3-0.6B - --served-model-name
--served-model-name Qwen/Qwen3-0.6B - Qwen/Qwen3-0.6B
--page-size 16 - --page-size
--tp 1 - "16"
--trust-remote-code - --tp
--skip-tokenizer-init - "1"
\ No newline at end of file - --trust-remote-code
- --skip-tokenizer-init
\ No newline at end of file
...@@ -30,14 +30,17 @@ spec: ...@@ -30,14 +30,17 @@ spec:
image: my-registry/sglang-runtime:my-tag image: my-registry/sglang-runtime:my-tag
workingDir: /workspace/components/backends/sglang workingDir: /workspace/components/backends/sglang
command: command:
- /bin/sh - python3
- -c - -m
- dynamo.sglang
args: args:
- >- - --model-path
python3 -m dynamo.sglang - Qwen/Qwen3-0.6B
--model-path Qwen/Qwen3-0.6B - --served-model-name
--served-model-name Qwen/Qwen3-0.6B - Qwen/Qwen3-0.6B
--page-size 16 - --page-size
--tp 1 - "16"
--trust-remote-code - --tp
--skip-tokenizer-init - "1"
- --trust-remote-code
- --skip-tokenizer-init
...@@ -37,19 +37,27 @@ spec: ...@@ -37,19 +37,27 @@ spec:
mainContainer: mainContainer:
image: my-registry/sglang-runtime:my-tag image: my-registry/sglang-runtime:my-tag
workingDir: /workspace/components/backends/sglang workingDir: /workspace/components/backends/sglang
command: ["sh", "-c"] command:
- python3
- -m
- dynamo.sglang
args: args:
- >- - --model-path
python3 -m dynamo.sglang - Qwen/Qwen3-0.6B
--model-path Qwen/Qwen3-0.6B - --served-model-name
--served-model-name Qwen/Qwen3-0.6B - Qwen/Qwen3-0.6B
--tp-size 8 - --tp-size
--trust-remote-code - "8"
--skip-tokenizer-init - --trust-remote-code
--disaggregation-mode decode - --skip-tokenizer-init
--disaggregation-transfer-backend nixl - --disaggregation-mode
--disaggregation-bootstrap-port 30001 - decode
--mem-fraction-static 0.82 - --disaggregation-transfer-backend
- nixl
- --disaggregation-bootstrap-port
- "30001"
- --mem-fraction-static
- "0.82"
prefill: prefill:
multinode: multinode:
nodeCount: 2 nodeCount: 2
...@@ -64,16 +72,24 @@ spec: ...@@ -64,16 +72,24 @@ spec:
mainContainer: mainContainer:
image: my-registry/sglang-runtime:my-tag image: my-registry/sglang-runtime:my-tag
workingDir: /workspace/components/backends/sglang workingDir: /workspace/components/backends/sglang
command: ["sh", "-c"] command:
- python3
- -m
- dynamo.sglang
args: args:
- >- - --model-path
python3 -m dynamo.sglang - Qwen/Qwen3-0.6B
--model-path Qwen/Qwen3-0.6B - --served-model-name
--served-model-name Qwen/Qwen3-0.6B - Qwen/Qwen3-0.6B
--tp-size 8 - --tp-size
--trust-remote-code - "8"
--skip-tokenizer-init - --trust-remote-code
--disaggregation-mode prefill - --skip-tokenizer-init
--disaggregation-transfer-backend nixl - --disaggregation-mode
--disaggregation-bootstrap-port 30001 - prefill
--mem-fraction-static 0.82 - --disaggregation-transfer-backend
- nixl
- --disaggregation-bootstrap-port
- "30001"
- --mem-fraction-static
- "0.82"
...@@ -27,19 +27,24 @@ spec: ...@@ -27,19 +27,24 @@ spec:
image: my-registry/sglang-runtime:my-tag image: my-registry/sglang-runtime:my-tag
workingDir: /workspace/components/backends/sglang workingDir: /workspace/components/backends/sglang
command: command:
- /bin/sh - python3
- -c - -m
- dynamo.sglang
args: args:
- >- - --model-path
python3 -m dynamo.sglang - Qwen/Qwen3-0.6B
--model-path Qwen/Qwen3-0.6B - --served-model-name
--served-model-name Qwen/Qwen3-0.6B - Qwen/Qwen3-0.6B
--page-size 16 - --page-size
--tp 1 - "16"
--trust-remote-code - --tp
--skip-tokenizer-init - "1"
--disaggregation-mode decode - --trust-remote-code
--disaggregation-transfer-backend nixl - --skip-tokenizer-init
- --disaggregation-mode
- decode
- --disaggregation-transfer-backend
- nixl
prefill: prefill:
envFromSecret: hf-token-secret envFromSecret: hf-token-secret
...@@ -54,16 +59,21 @@ spec: ...@@ -54,16 +59,21 @@ spec:
image: my-registry/sglang-runtime:my-tag image: my-registry/sglang-runtime:my-tag
workingDir: /workspace/components/backends/sglang workingDir: /workspace/components/backends/sglang
command: command:
- /bin/sh - python3
- -c - -m
- dynamo.sglang
args: args:
- >- - --model-path
python3 -m dynamo.sglang - Qwen/Qwen3-0.6B
--model-path Qwen/Qwen3-0.6B - --served-model-name
--served-model-name Qwen/Qwen3-0.6B - Qwen/Qwen3-0.6B
--page-size 16 - --page-size
--tp 1 - "16"
--trust-remote-code - --tp
--skip-tokenizer-init - "1"
--disaggregation-mode prefill - --trust-remote-code
--disaggregation-transfer-backend nixl - --skip-tokenizer-init
- --disaggregation-mode
- prefill
- --disaggregation-transfer-backend
- nixl
...@@ -50,15 +50,14 @@ spec: ...@@ -50,15 +50,14 @@ spec:
image: my-registry/sglang-runtime:my-tag image: my-registry/sglang-runtime:my-tag
workingDir: /workspace/components/planner/src/dynamo/planner workingDir: /workspace/components/planner/src/dynamo/planner
command: command:
- /bin/sh - python3
- -c - -m
- planner_sla
args: args:
- >- - --environment=kubernetes
python3 -m planner_sla - --backend=sglang
--environment=kubernetes - --adjustment-interval=60
--backend=sglang - --profile-results-dir=/data/profiling_results
--adjustment-interval=60
--profile-results-dir=/data/profiling_results
decode: decode:
dynamoNamespace: dynamo dynamoNamespace: dynamo
envFromSecret: hf-token-secret envFromSecret: hf-token-secret
......
...@@ -58,11 +58,13 @@ spec: ...@@ -58,11 +58,13 @@ spec:
mountPath: /workspace/components/backends/trtllm/engine_configs mountPath: /workspace/components/backends/trtllm/engine_configs
readOnly: true readOnly: true
command: command:
- /bin/sh - python3
- -c - -m
- dynamo.trtllm
args: args:
- >- - --model-path
python3 -m dynamo.trtllm - Qwen/Qwen3-0.6B
--model-path Qwen/Qwen3-0.6B - --served-model-name
--served-model-name Qwen/Qwen3-0.6B - Qwen/Qwen3-0.6B
--extra-engine-args engine_configs/agg.yaml - --extra-engine-args
- engine_configs/agg.yaml
...@@ -27,11 +27,13 @@ spec: ...@@ -27,11 +27,13 @@ spec:
image: my-registry/trtllm-runtime:my-tag image: my-registry/trtllm-runtime:my-tag
workingDir: /workspace/components/backends/trtllm workingDir: /workspace/components/backends/trtllm
command: command:
- /bin/sh - python3
- -c - -m
- dynamo.trtllm
args: args:
- >- - --model-path
python3 -m dynamo.trtllm - Qwen/Qwen3-0.6B
--model-path Qwen/Qwen3-0.6B - --served-model-name
--served-model-name Qwen/Qwen3-0.6B - Qwen/Qwen3-0.6B
--extra-engine-args engine_configs/agg.yaml - --extra-engine-args
- engine_configs/agg.yaml
...@@ -30,12 +30,14 @@ spec: ...@@ -30,12 +30,14 @@ spec:
image: my-registry/trtllm-runtime:my-tag image: my-registry/trtllm-runtime:my-tag
workingDir: /workspace/components/backends/trtllm workingDir: /workspace/components/backends/trtllm
command: command:
- /bin/sh - python3
- -c - -m
- dynamo.trtllm
args: args:
- >- - --model-path
python3 -m dynamo.trtllm - Qwen/Qwen3-0.6B
--model-path Qwen/Qwen3-0.6B - --served-model-name
--served-model-name Qwen/Qwen3-0.6B - Qwen/Qwen3-0.6B
--extra-engine-args engine_configs/agg.yaml - --extra-engine-args
--publish-events-and-metrics - engine_configs/agg.yaml
- --publish-events-and-metrics
...@@ -96,10 +96,12 @@ spec: ...@@ -96,10 +96,12 @@ spec:
image: my-registry/trtllm-runtime:my-tag image: my-registry/trtllm-runtime:my-tag
workingDir: /workspace/components/backends/trtllm workingDir: /workspace/components/backends/trtllm
command: command:
- /bin/sh - python3
- -c - -m
- dynamo.frontend
args: args:
- "python3 -m dynamo.frontend --http-port 8000" - --http-port
- "8000"
prefill: prefill:
pvc: pvc:
name: models name: models
...@@ -126,10 +128,20 @@ spec: ...@@ -126,10 +128,20 @@ spec:
image: my-registry/trtllm-runtime:my-tag image: my-registry/trtllm-runtime:my-tag
workingDir: /workspace/components/backends/trtllm workingDir: /workspace/components/backends/trtllm
command: command:
- /bin/sh - python3
- -c - -m
- dynamo.trtllm
args: args:
- "python3 -m dynamo.trtllm --model-path Qwen/Qwen3-0.6B --served-model-name Qwen/Qwen3-0.6B --extra-engine-args engine_configs/prefill.yaml --disaggregation-mode prefill --disaggregation-strategy decode_first" - --model-path
- Qwen/Qwen3-0.6B
- --served-model-name
- Qwen/Qwen3-0.6B
- --extra-engine-args
- engine_configs/prefill.yaml
- --disaggregation-mode
- prefill
- --disaggregation-strategy
- decode_first
decode: decode:
pvc: pvc:
name: models name: models
...@@ -156,7 +168,17 @@ spec: ...@@ -156,7 +168,17 @@ spec:
image: my-registry/trtllm-runtime:my-tag image: my-registry/trtllm-runtime:my-tag
workingDir: /workspace/components/backends/trtllm workingDir: /workspace/components/backends/trtllm
command: command:
- /bin/sh - python3
- -c - -m
- dynamo.trtllm
args: args:
- "python3 -m dynamo.trtllm --model-path Qwen/Qwen3-0.6B --served-model-name Qwen/Qwen3-0.6B --extra-engine-args engine_configs/decode.yaml --disaggregation-mode decode --disaggregation-strategy decode_first" - --model-path
- Qwen/Qwen3-0.6B
- --served-model-name
- Qwen/Qwen3-0.6B
- --extra-engine-args
- engine_configs/decode.yaml
- --disaggregation-mode
- decode
- --disaggregation-strategy
- decode_first
...@@ -27,10 +27,20 @@ spec: ...@@ -27,10 +27,20 @@ spec:
image: my-registry/trtllm-runtime:my-tag image: my-registry/trtllm-runtime:my-tag
workingDir: /workspace/components/backends/trtllm workingDir: /workspace/components/backends/trtllm
command: command:
- /bin/sh - python3
- -c - -m
- dynamo.trtllm
args: args:
- "python3 -m dynamo.trtllm --model-path Qwen/Qwen3-0.6B --served-model-name Qwen/Qwen3-0.6B --extra-engine-args engine_configs/prefill.yaml --disaggregation-mode prefill --disaggregation-strategy decode_first" - --model-path
- Qwen/Qwen3-0.6B
- --served-model-name
- Qwen/Qwen3-0.6B
- --extra-engine-args
- engine_configs/prefill.yaml
- --disaggregation-mode
- prefill
- --disaggregation-strategy
- decode_first
TRTLLMDecodeWorker: TRTLLMDecodeWorker:
dynamoNamespace: trtllm-disagg dynamoNamespace: trtllm-disagg
envFromSecret: hf-token-secret envFromSecret: hf-token-secret
...@@ -44,7 +54,17 @@ spec: ...@@ -44,7 +54,17 @@ spec:
image: my-registry/trtllm-runtime:my-tag image: my-registry/trtllm-runtime:my-tag
workingDir: /workspace/components/backends/trtllm workingDir: /workspace/components/backends/trtllm
command: command:
- /bin/sh - python3
- -c - -m
- dynamo.trtllm
args: args:
- "python3 -m dynamo.trtllm --model-path Qwen/Qwen3-0.6B --served-model-name Qwen/Qwen3-0.6B --extra-engine-args engine_configs/decode.yaml --disaggregation-mode decode --disaggregation-strategy decode_first" - --model-path
- Qwen/Qwen3-0.6B
- --served-model-name
- Qwen/Qwen3-0.6B
- --extra-engine-args
- engine_configs/decode.yaml
- --disaggregation-mode
- decode
- --disaggregation-strategy
- decode_first
...@@ -30,10 +30,21 @@ spec: ...@@ -30,10 +30,21 @@ spec:
image: my-registry/trtllm-runtime:my-tag image: my-registry/trtllm-runtime:my-tag
workingDir: /workspace/components/backends/trtllm workingDir: /workspace/components/backends/trtllm
command: command:
- /bin/sh - python3
- -c - -m
- dynamo.trtllm
args: args:
- "python3 -m dynamo.trtllm --model-path Qwen/Qwen3-0.6B --served-model-name Qwen/Qwen3-0.6B --extra-engine-args engine_configs/prefill.yaml --disaggregation-mode prefill --disaggregation-strategy prefill_first --publish-events-and-metrics" - --model-path
- Qwen/Qwen3-0.6B
- --served-model-name
- Qwen/Qwen3-0.6B
- --extra-engine-args
- engine_configs/prefill.yaml
- --disaggregation-mode
- prefill
- --disaggregation-strategy
- prefill_first
- --publish-events-and-metrics
TRTLLMDecodeWorker: TRTLLMDecodeWorker:
dynamoNamespace: trtllm-v1-disagg-router dynamoNamespace: trtllm-v1-disagg-router
envFromSecret: hf-token-secret envFromSecret: hf-token-secret
...@@ -47,7 +58,17 @@ spec: ...@@ -47,7 +58,17 @@ spec:
image: my-registry/trtllm-runtime:my-tag image: my-registry/trtllm-runtime:my-tag
workingDir: /workspace/components/backends/trtllm workingDir: /workspace/components/backends/trtllm
command: command:
- /bin/sh - python3
- -c - -m
- dynamo.trtllm
args: args:
- "python3 -m dynamo.trtllm --model-path Qwen/Qwen3-0.6B --served-model-name Qwen/Qwen3-0.6B --extra-engine-args engine_configs/decode.yaml --disaggregation-mode decode --disaggregation-strategy prefill_first" - --model-path
- Qwen/Qwen3-0.6B
- --served-model-name
- Qwen/Qwen3-0.6B
- --extra-engine-args
- engine_configs/decode.yaml
- --disaggregation-mode
- decode
- --disaggregation-strategy
- prefill_first
...@@ -27,7 +27,9 @@ spec: ...@@ -27,7 +27,9 @@ spec:
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
workingDir: /workspace/components/backends/vllm workingDir: /workspace/components/backends/vllm
command: command:
- /bin/sh - python3
- -c - -m
- dynamo.vllm
args: args:
- python3 -m dynamo.vllm --model Qwen/Qwen3-0.6B - --model
- Qwen/Qwen3-0.6B
...@@ -30,7 +30,9 @@ spec: ...@@ -30,7 +30,9 @@ spec:
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
workingDir: /workspace/components/backends/vllm workingDir: /workspace/components/backends/vllm
command: command:
- /bin/sh - python3
- -c - -m
- dynamo.vllm
args: args:
- python3 -m dynamo.vllm --model Qwen/Qwen3-0.6B - --model
- Qwen/Qwen3-0.6B
...@@ -16,10 +16,12 @@ spec: ...@@ -16,10 +16,12 @@ spec:
image: my-registry/vllm-runtime:my-tag image: my-registry/vllm-runtime:my-tag
workingDir: /workspace/components/backends/vllm workingDir: /workspace/components/backends/vllm
command: command:
- /bin/sh - python3
- -c - -m
- dynamo.frontend
args: args:
- "python3 -m dynamo.frontend --http-port 8000" - --http-port
- "8000"
decode: decode:
dynamoNamespace: vllm-disagg dynamoNamespace: vllm-disagg
envFromSecret: hf-token-secret envFromSecret: hf-token-secret
...@@ -35,10 +37,14 @@ spec: ...@@ -35,10 +37,14 @@ spec:
image: my-registry/vllm-runtime:my-tag image: my-registry/vllm-runtime:my-tag
workingDir: /workspace/components/backends/vllm workingDir: /workspace/components/backends/vllm
command: command:
- /bin/sh - python3
- -c - -m
- dynamo.vllm
args: args:
- "python3 -m dynamo.vllm --model Qwen/Qwen3-0.6B --tensor-parallel-size 2" - --model
- Qwen/Qwen3-0.6B
- --tensor-parallel-size
- "2"
prefill: prefill:
dynamoNamespace: vllm-disagg dynamoNamespace: vllm-disagg
envFromSecret: hf-token-secret envFromSecret: hf-token-secret
...@@ -54,7 +60,12 @@ spec: ...@@ -54,7 +60,12 @@ spec:
image: my-registry/vllm-runtime:my-tag image: my-registry/vllm-runtime:my-tag
workingDir: /workspace/components/backends/vllm workingDir: /workspace/components/backends/vllm
command: command:
- /bin/sh - python3
- -c - -m
- dynamo.vllm
args: args:
- "python3 -m dynamo.vllm --model Qwen/Qwen3-0.6B --is-prefill-worker --tensor-parallel-size 2" - --model
- Qwen/Qwen3-0.6B
- --is-prefill-worker
- --tensor-parallel-size
- "2"
...@@ -27,10 +27,12 @@ spec: ...@@ -27,10 +27,12 @@ spec:
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
workingDir: /workspace/components/backends/vllm workingDir: /workspace/components/backends/vllm
command: command:
- /bin/sh - python3
- -c - -m
- dynamo.vllm
args: args:
- "python3 -m dynamo.vllm --model Qwen/Qwen3-0.6B" - --model
- Qwen/Qwen3-0.6B
VllmPrefillWorker: VllmPrefillWorker:
dynamoNamespace: vllm-disagg dynamoNamespace: vllm-disagg
envFromSecret: hf-token-secret envFromSecret: hf-token-secret
...@@ -44,7 +46,10 @@ spec: ...@@ -44,7 +46,10 @@ spec:
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
workingDir: /workspace/components/backends/vllm workingDir: /workspace/components/backends/vllm
command: command:
- /bin/sh - python3
- -c - -m
- dynamo.vllm
args: args:
- "python3 -m dynamo.vllm --model Qwen/Qwen3-0.6B --is-prefill-worker" - --model
- Qwen/Qwen3-0.6B
- --is-prefill-worker
...@@ -50,15 +50,14 @@ spec: ...@@ -50,15 +50,14 @@ spec:
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
workingDir: /workspace/components/planner/src/dynamo/planner workingDir: /workspace/components/planner/src/dynamo/planner
command: command:
- /bin/sh - python3
- -c - -m
- planner_sla
args: args:
- >- - --environment=kubernetes
python3 -m planner_sla - --backend=vllm
--environment=kubernetes - --adjustment-interval=60
--backend=vllm - --profile-results-dir=/data/profiling_results
--adjustment-interval=60
--profile-results-dir=/data/profiling_results
VllmDecodeWorker: VllmDecodeWorker:
dynamoNamespace: vllm-disagg-planner dynamoNamespace: vllm-disagg-planner
envFromSecret: hf-token-secret envFromSecret: hf-token-secret
......
...@@ -30,10 +30,12 @@ spec: ...@@ -30,10 +30,12 @@ spec:
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
workingDir: /workspace/components/backends/vllm workingDir: /workspace/components/backends/vllm
command: command:
- /bin/sh - python3
- -c - -m
- dynamo.vllm
args: args:
- python3 -m dynamo.vllm --model Qwen/Qwen3-0.6B - --model
- Qwen/Qwen3-0.6B
VllmPrefillWorker: VllmPrefillWorker:
dynamoNamespace: vllm-v1-disagg-router dynamoNamespace: vllm-v1-disagg-router
envFromSecret: hf-token-secret envFromSecret: hf-token-secret
...@@ -47,7 +49,10 @@ spec: ...@@ -47,7 +49,10 @@ spec:
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
workingDir: /workspace/components/backends/vllm workingDir: /workspace/components/backends/vllm
command: command:
- /bin/sh - python3
- -c - -m
- dynamo.vllm
args: args:
- python3 -m dynamo.vllm --model Qwen/Qwen3-0.6B --is-prefill-worker - --model
- Qwen/Qwen3-0.6B
- --is-prefill-worker
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment