Unverified Commit a01b7992 authored by Julien Mancuso's avatar Julien Mancuso Committed by GitHub
Browse files

feat: use python3 in all examples (#3259)


Signed-off-by: default avatarJulien Mancuso <jmancuso@nvidia.com>
parent 02209bbb
......@@ -27,15 +27,18 @@ spec:
image: my-registry/sglang-runtime:my-tag
workingDir: /workspace/components/backends/sglang
command:
- /bin/sh
- -c
- python3
- -m
- dynamo.sglang
args:
- >-
python3 -m dynamo.sglang
--model-path Qwen/Qwen3-0.6B
--served-model-name Qwen/Qwen3-0.6B
--page-size 16
--tp 1
--trust-remote-code
--skip-tokenizer-init
- --model-path
- Qwen/Qwen3-0.6B
- --served-model-name
- Qwen/Qwen3-0.6B
- --page-size
- "16"
- --tp
- "1"
- --trust-remote-code
- --skip-tokenizer-init
......@@ -30,14 +30,17 @@ spec:
image: my-registry/sglang-runtime:my-tag
workingDir: /workspace/components/backends/sglang
command:
- /bin/sh
- -c
- python3
- -m
- dynamo.sglang
args:
- >-
python3 -m dynamo.sglang
--model-path Qwen/Qwen3-0.6B
--served-model-name Qwen/Qwen3-0.6B
--page-size 16
--tp 1
--trust-remote-code
--skip-tokenizer-init
\ No newline at end of file
- --model-path
- Qwen/Qwen3-0.6B
- --served-model-name
- Qwen/Qwen3-0.6B
- --page-size
- "16"
- --tp
- "1"
- --trust-remote-code
- --skip-tokenizer-init
\ No newline at end of file
......@@ -30,14 +30,17 @@ spec:
image: my-registry/sglang-runtime:my-tag
workingDir: /workspace/components/backends/sglang
command:
- /bin/sh
- -c
- python3
- -m
- dynamo.sglang
args:
- >-
python3 -m dynamo.sglang
--model-path Qwen/Qwen3-0.6B
--served-model-name Qwen/Qwen3-0.6B
--page-size 16
--tp 1
--trust-remote-code
--skip-tokenizer-init
- --model-path
- Qwen/Qwen3-0.6B
- --served-model-name
- Qwen/Qwen3-0.6B
- --page-size
- "16"
- --tp
- "1"
- --trust-remote-code
- --skip-tokenizer-init
......@@ -37,19 +37,27 @@ spec:
mainContainer:
image: my-registry/sglang-runtime:my-tag
workingDir: /workspace/components/backends/sglang
command: ["sh", "-c"]
command:
- python3
- -m
- dynamo.sglang
args:
- >-
python3 -m dynamo.sglang
--model-path Qwen/Qwen3-0.6B
--served-model-name Qwen/Qwen3-0.6B
--tp-size 8
--trust-remote-code
--skip-tokenizer-init
--disaggregation-mode decode
--disaggregation-transfer-backend nixl
--disaggregation-bootstrap-port 30001
--mem-fraction-static 0.82
- --model-path
- Qwen/Qwen3-0.6B
- --served-model-name
- Qwen/Qwen3-0.6B
- --tp-size
- "8"
- --trust-remote-code
- --skip-tokenizer-init
- --disaggregation-mode
- decode
- --disaggregation-transfer-backend
- nixl
- --disaggregation-bootstrap-port
- "30001"
- --mem-fraction-static
- "0.82"
prefill:
multinode:
nodeCount: 2
......@@ -64,16 +72,24 @@ spec:
mainContainer:
image: my-registry/sglang-runtime:my-tag
workingDir: /workspace/components/backends/sglang
command: ["sh", "-c"]
command:
- python3
- -m
- dynamo.sglang
args:
- >-
python3 -m dynamo.sglang
--model-path Qwen/Qwen3-0.6B
--served-model-name Qwen/Qwen3-0.6B
--tp-size 8
--trust-remote-code
--skip-tokenizer-init
--disaggregation-mode prefill
--disaggregation-transfer-backend nixl
--disaggregation-bootstrap-port 30001
--mem-fraction-static 0.82
- --model-path
- Qwen/Qwen3-0.6B
- --served-model-name
- Qwen/Qwen3-0.6B
- --tp-size
- "8"
- --trust-remote-code
- --skip-tokenizer-init
- --disaggregation-mode
- prefill
- --disaggregation-transfer-backend
- nixl
- --disaggregation-bootstrap-port
- "30001"
- --mem-fraction-static
- "0.82"
......@@ -27,19 +27,24 @@ spec:
image: my-registry/sglang-runtime:my-tag
workingDir: /workspace/components/backends/sglang
command:
- /bin/sh
- -c
- python3
- -m
- dynamo.sglang
args:
- >-
python3 -m dynamo.sglang
--model-path Qwen/Qwen3-0.6B
--served-model-name Qwen/Qwen3-0.6B
--page-size 16
--tp 1
--trust-remote-code
--skip-tokenizer-init
--disaggregation-mode decode
--disaggregation-transfer-backend nixl
- --model-path
- Qwen/Qwen3-0.6B
- --served-model-name
- Qwen/Qwen3-0.6B
- --page-size
- "16"
- --tp
- "1"
- --trust-remote-code
- --skip-tokenizer-init
- --disaggregation-mode
- decode
- --disaggregation-transfer-backend
- nixl
prefill:
envFromSecret: hf-token-secret
......@@ -54,16 +59,21 @@ spec:
image: my-registry/sglang-runtime:my-tag
workingDir: /workspace/components/backends/sglang
command:
- /bin/sh
- -c
- python3
- -m
- dynamo.sglang
args:
- >-
python3 -m dynamo.sglang
--model-path Qwen/Qwen3-0.6B
--served-model-name Qwen/Qwen3-0.6B
--page-size 16
--tp 1
--trust-remote-code
--skip-tokenizer-init
--disaggregation-mode prefill
--disaggregation-transfer-backend nixl
- --model-path
- Qwen/Qwen3-0.6B
- --served-model-name
- Qwen/Qwen3-0.6B
- --page-size
- "16"
- --tp
- "1"
- --trust-remote-code
- --skip-tokenizer-init
- --disaggregation-mode
- prefill
- --disaggregation-transfer-backend
- nixl
......@@ -50,15 +50,14 @@ spec:
image: my-registry/sglang-runtime:my-tag
workingDir: /workspace/components/planner/src/dynamo/planner
command:
- /bin/sh
- -c
- python3
- -m
- planner_sla
args:
- >-
python3 -m planner_sla
--environment=kubernetes
--backend=sglang
--adjustment-interval=60
--profile-results-dir=/data/profiling_results
- --environment=kubernetes
- --backend=sglang
- --adjustment-interval=60
- --profile-results-dir=/data/profiling_results
decode:
dynamoNamespace: dynamo
envFromSecret: hf-token-secret
......
......@@ -58,11 +58,13 @@ spec:
mountPath: /workspace/components/backends/trtllm/engine_configs
readOnly: true
command:
- /bin/sh
- -c
- python3
- -m
- dynamo.trtllm
args:
- >-
python3 -m dynamo.trtllm
--model-path Qwen/Qwen3-0.6B
--served-model-name Qwen/Qwen3-0.6B
--extra-engine-args engine_configs/agg.yaml
- --model-path
- Qwen/Qwen3-0.6B
- --served-model-name
- Qwen/Qwen3-0.6B
- --extra-engine-args
- engine_configs/agg.yaml
......@@ -27,11 +27,13 @@ spec:
image: my-registry/trtllm-runtime:my-tag
workingDir: /workspace/components/backends/trtllm
command:
- /bin/sh
- -c
- python3
- -m
- dynamo.trtllm
args:
- >-
python3 -m dynamo.trtllm
--model-path Qwen/Qwen3-0.6B
--served-model-name Qwen/Qwen3-0.6B
--extra-engine-args engine_configs/agg.yaml
- --model-path
- Qwen/Qwen3-0.6B
- --served-model-name
- Qwen/Qwen3-0.6B
- --extra-engine-args
- engine_configs/agg.yaml
......@@ -30,12 +30,14 @@ spec:
image: my-registry/trtllm-runtime:my-tag
workingDir: /workspace/components/backends/trtllm
command:
- /bin/sh
- -c
- python3
- -m
- dynamo.trtllm
args:
- >-
python3 -m dynamo.trtllm
--model-path Qwen/Qwen3-0.6B
--served-model-name Qwen/Qwen3-0.6B
--extra-engine-args engine_configs/agg.yaml
--publish-events-and-metrics
- --model-path
- Qwen/Qwen3-0.6B
- --served-model-name
- Qwen/Qwen3-0.6B
- --extra-engine-args
- engine_configs/agg.yaml
- --publish-events-and-metrics
......@@ -96,10 +96,12 @@ spec:
image: my-registry/trtllm-runtime:my-tag
workingDir: /workspace/components/backends/trtllm
command:
- /bin/sh
- -c
- python3
- -m
- dynamo.frontend
args:
- "python3 -m dynamo.frontend --http-port 8000"
- --http-port
- "8000"
prefill:
pvc:
name: models
......@@ -126,10 +128,20 @@ spec:
image: my-registry/trtllm-runtime:my-tag
workingDir: /workspace/components/backends/trtllm
command:
- /bin/sh
- -c
- python3
- -m
- dynamo.trtllm
args:
- "python3 -m dynamo.trtllm --model-path Qwen/Qwen3-0.6B --served-model-name Qwen/Qwen3-0.6B --extra-engine-args engine_configs/prefill.yaml --disaggregation-mode prefill --disaggregation-strategy decode_first"
- --model-path
- Qwen/Qwen3-0.6B
- --served-model-name
- Qwen/Qwen3-0.6B
- --extra-engine-args
- engine_configs/prefill.yaml
- --disaggregation-mode
- prefill
- --disaggregation-strategy
- decode_first
decode:
pvc:
name: models
......@@ -156,7 +168,17 @@ spec:
image: my-registry/trtllm-runtime:my-tag
workingDir: /workspace/components/backends/trtllm
command:
- /bin/sh
- -c
- python3
- -m
- dynamo.trtllm
args:
- "python3 -m dynamo.trtllm --model-path Qwen/Qwen3-0.6B --served-model-name Qwen/Qwen3-0.6B --extra-engine-args engine_configs/decode.yaml --disaggregation-mode decode --disaggregation-strategy decode_first"
- --model-path
- Qwen/Qwen3-0.6B
- --served-model-name
- Qwen/Qwen3-0.6B
- --extra-engine-args
- engine_configs/decode.yaml
- --disaggregation-mode
- decode
- --disaggregation-strategy
- decode_first
......@@ -27,10 +27,20 @@ spec:
image: my-registry/trtllm-runtime:my-tag
workingDir: /workspace/components/backends/trtllm
command:
- /bin/sh
- -c
- python3
- -m
- dynamo.trtllm
args:
- "python3 -m dynamo.trtllm --model-path Qwen/Qwen3-0.6B --served-model-name Qwen/Qwen3-0.6B --extra-engine-args engine_configs/prefill.yaml --disaggregation-mode prefill --disaggregation-strategy decode_first"
- --model-path
- Qwen/Qwen3-0.6B
- --served-model-name
- Qwen/Qwen3-0.6B
- --extra-engine-args
- engine_configs/prefill.yaml
- --disaggregation-mode
- prefill
- --disaggregation-strategy
- decode_first
TRTLLMDecodeWorker:
dynamoNamespace: trtllm-disagg
envFromSecret: hf-token-secret
......@@ -44,7 +54,17 @@ spec:
image: my-registry/trtllm-runtime:my-tag
workingDir: /workspace/components/backends/trtllm
command:
- /bin/sh
- -c
- python3
- -m
- dynamo.trtllm
args:
- "python3 -m dynamo.trtllm --model-path Qwen/Qwen3-0.6B --served-model-name Qwen/Qwen3-0.6B --extra-engine-args engine_configs/decode.yaml --disaggregation-mode decode --disaggregation-strategy decode_first"
- --model-path
- Qwen/Qwen3-0.6B
- --served-model-name
- Qwen/Qwen3-0.6B
- --extra-engine-args
- engine_configs/decode.yaml
- --disaggregation-mode
- decode
- --disaggregation-strategy
- decode_first
......@@ -30,10 +30,21 @@ spec:
image: my-registry/trtllm-runtime:my-tag
workingDir: /workspace/components/backends/trtllm
command:
- /bin/sh
- -c
- python3
- -m
- dynamo.trtllm
args:
- "python3 -m dynamo.trtllm --model-path Qwen/Qwen3-0.6B --served-model-name Qwen/Qwen3-0.6B --extra-engine-args engine_configs/prefill.yaml --disaggregation-mode prefill --disaggregation-strategy prefill_first --publish-events-and-metrics"
- --model-path
- Qwen/Qwen3-0.6B
- --served-model-name
- Qwen/Qwen3-0.6B
- --extra-engine-args
- engine_configs/prefill.yaml
- --disaggregation-mode
- prefill
- --disaggregation-strategy
- prefill_first
- --publish-events-and-metrics
TRTLLMDecodeWorker:
dynamoNamespace: trtllm-v1-disagg-router
envFromSecret: hf-token-secret
......@@ -47,7 +58,17 @@ spec:
image: my-registry/trtllm-runtime:my-tag
workingDir: /workspace/components/backends/trtllm
command:
- /bin/sh
- -c
- python3
- -m
- dynamo.trtllm
args:
- "python3 -m dynamo.trtllm --model-path Qwen/Qwen3-0.6B --served-model-name Qwen/Qwen3-0.6B --extra-engine-args engine_configs/decode.yaml --disaggregation-mode decode --disaggregation-strategy prefill_first"
- --model-path
- Qwen/Qwen3-0.6B
- --served-model-name
- Qwen/Qwen3-0.6B
- --extra-engine-args
- engine_configs/decode.yaml
- --disaggregation-mode
- decode
- --disaggregation-strategy
- prefill_first
......@@ -27,7 +27,9 @@ spec:
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
workingDir: /workspace/components/backends/vllm
command:
- /bin/sh
- -c
- python3
- -m
- dynamo.vllm
args:
- python3 -m dynamo.vllm --model Qwen/Qwen3-0.6B
- --model
- Qwen/Qwen3-0.6B
......@@ -30,7 +30,9 @@ spec:
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
workingDir: /workspace/components/backends/vllm
command:
- /bin/sh
- -c
- python3
- -m
- dynamo.vllm
args:
- python3 -m dynamo.vllm --model Qwen/Qwen3-0.6B
- --model
- Qwen/Qwen3-0.6B
......@@ -16,10 +16,12 @@ spec:
image: my-registry/vllm-runtime:my-tag
workingDir: /workspace/components/backends/vllm
command:
- /bin/sh
- -c
- python3
- -m
- dynamo.frontend
args:
- "python3 -m dynamo.frontend --http-port 8000"
- --http-port
- "8000"
decode:
dynamoNamespace: vllm-disagg
envFromSecret: hf-token-secret
......@@ -35,10 +37,14 @@ spec:
image: my-registry/vllm-runtime:my-tag
workingDir: /workspace/components/backends/vllm
command:
- /bin/sh
- -c
- python3
- -m
- dynamo.vllm
args:
- "python3 -m dynamo.vllm --model Qwen/Qwen3-0.6B --tensor-parallel-size 2"
- --model
- Qwen/Qwen3-0.6B
- --tensor-parallel-size
- "2"
prefill:
dynamoNamespace: vllm-disagg
envFromSecret: hf-token-secret
......@@ -54,7 +60,12 @@ spec:
image: my-registry/vllm-runtime:my-tag
workingDir: /workspace/components/backends/vllm
command:
- /bin/sh
- -c
- python3
- -m
- dynamo.vllm
args:
- "python3 -m dynamo.vllm --model Qwen/Qwen3-0.6B --is-prefill-worker --tensor-parallel-size 2"
- --model
- Qwen/Qwen3-0.6B
- --is-prefill-worker
- --tensor-parallel-size
- "2"
......@@ -27,10 +27,12 @@ spec:
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
workingDir: /workspace/components/backends/vllm
command:
- /bin/sh
- -c
- python3
- -m
- dynamo.vllm
args:
- "python3 -m dynamo.vllm --model Qwen/Qwen3-0.6B"
- --model
- Qwen/Qwen3-0.6B
VllmPrefillWorker:
dynamoNamespace: vllm-disagg
envFromSecret: hf-token-secret
......@@ -44,7 +46,10 @@ spec:
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
workingDir: /workspace/components/backends/vllm
command:
- /bin/sh
- -c
- python3
- -m
- dynamo.vllm
args:
- "python3 -m dynamo.vllm --model Qwen/Qwen3-0.6B --is-prefill-worker"
- --model
- Qwen/Qwen3-0.6B
- --is-prefill-worker
......@@ -50,15 +50,14 @@ spec:
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
workingDir: /workspace/components/planner/src/dynamo/planner
command:
- /bin/sh
- -c
- python3
- -m
- planner_sla
args:
- >-
python3 -m planner_sla
--environment=kubernetes
--backend=vllm
--adjustment-interval=60
--profile-results-dir=/data/profiling_results
- --environment=kubernetes
- --backend=vllm
- --adjustment-interval=60
- --profile-results-dir=/data/profiling_results
VllmDecodeWorker:
dynamoNamespace: vllm-disagg-planner
envFromSecret: hf-token-secret
......
......@@ -30,10 +30,12 @@ spec:
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
workingDir: /workspace/components/backends/vllm
command:
- /bin/sh
- -c
- python3
- -m
- dynamo.vllm
args:
- python3 -m dynamo.vllm --model Qwen/Qwen3-0.6B
- --model
- Qwen/Qwen3-0.6B
VllmPrefillWorker:
dynamoNamespace: vllm-v1-disagg-router
envFromSecret: hf-token-secret
......@@ -47,7 +49,10 @@ spec:
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
workingDir: /workspace/components/backends/vllm
command:
- /bin/sh
- -c
- python3
- -m
- dynamo.vllm
args:
- python3 -m dynamo.vllm --model Qwen/Qwen3-0.6B --is-prefill-worker
- --model
- Qwen/Qwen3-0.6B
- --is-prefill-worker
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment