Unverified Commit e77646ae authored by Hongkuan Zhou's avatar Hongkuan Zhou Committed by GitHub
Browse files

fix: planner e2e test config file using new CLI format (#7014)


Signed-off-by: default avatarhongkuanz <hongkuanz@nvidia.com>
parent 2e29620d
......@@ -27,18 +27,13 @@ spec:
extraPodSpec:
mainContainer:
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
workingDir: /workspace/components/src/dynamo/planner
command:
- python3
- -m
- planner_sla
- dynamo.planner
args:
- --environment=kubernetes
- --backend=vllm
- --enable-loadbased-scaling
- --disable-throughput-scaling
- --loadbased-adjustment-interval=5
- --loadbased-min-observations=5
- --config
- '{"environment": "kubernetes", "backend": "vllm", "enable_load_scaling": true, "enable_throughput_scaling": false, "pre_deployment_sweeping_mode": "none", "load_adjustment_interval": 5, "load_min_observations": 5}'
VllmDecodeWorker:
envFromSecret: hf-token-secret
componentType: worker
......
......@@ -19,17 +19,13 @@ spec:
extraPodSpec:
mainContainer:
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
workingDir: /workspace/components/src/dynamo/planner
command:
- python3
- -m
- planner_sla
- dynamo.planner
args:
- --environment=kubernetes
- --backend=vllm
- --adjustment-interval=60
- --profile-results-dir=/workspace/tests/planner/profiling_results/H200_TP1P_TP1D
- --no-correction
- --config
- '{"environment": "kubernetes", "backend": "vllm", "throughput_adjustment_interval": 60, "profile_results_dir": "/workspace/tests/planner/profiling_results/H200_TP1P_TP1D", "no_correction": true}'
VllmDecodeWorker:
envFromSecret: hf-token-secret
componentType: worker
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment