Unverified Commit ab5a31b5 authored by Alec's avatar Alec Committed by GitHub
Browse files

test(planner): isolate planner-family suites [DYN-2534] (#7723)

parent cc22114d
...@@ -9,15 +9,18 @@ where needed. ...@@ -9,15 +9,18 @@ where needed.
""" """
import os import os
import sys
from pathlib import Path from pathlib import Path
from unittest.mock import patch from unittest.mock import patch
import pytest import pytest
import yaml import yaml
project_root = Path(__file__).parent.parent.parent pytestmark = [
sys.path.insert(0, str(project_root)) pytest.mark.pre_merge,
pytest.mark.gpu_0,
pytest.mark.unit,
pytest.mark.planner,
]
try: try:
from dynamo.planner.config.planner_config import ( from dynamo.planner.config.planner_config import (
......
...@@ -9,15 +9,17 @@ the end-to-end test suite. ...@@ -9,15 +9,17 @@ the end-to-end test suite.
""" """
import copy import copy
import sys
from pathlib import Path
from unittest.mock import patch from unittest.mock import patch
import pandas as pd import pandas as pd
import pytest import pytest
project_root = Path(__file__).parent.parent.parent pytestmark = [
sys.path.insert(0, str(project_root)) pytest.mark.pre_merge,
pytest.mark.gpu_0,
pytest.mark.unit,
pytest.mark.planner,
]
try: try:
from dynamo.profiler.rapid import _run_default_sim, _run_naive_fallback from dynamo.profiler.rapid import _run_default_sim, _run_naive_fallback
......
...@@ -8,15 +8,17 @@ require live K8s deployments and are covered by the mocked end-to-end tests ...@@ -8,15 +8,17 @@ require live K8s deployments and are covered by the mocked end-to-end tests
in test_profile_sla_dgdr.py. in test_profile_sla_dgdr.py.
""" """
import sys
from pathlib import Path
from unittest.mock import patch from unittest.mock import patch
import pandas as pd import pandas as pd
import pytest import pytest
project_root = Path(__file__).parent.parent.parent pytestmark = [
sys.path.insert(0, str(project_root)) pytest.mark.pre_merge,
pytest.mark.gpu_0,
pytest.mark.unit,
pytest.mark.planner,
]
try: try:
from dynamo.profiler.thorough import _pick_thorough_best_config from dynamo.profiler.thorough import _pick_thorough_best_config
......
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0 # SPDX-License-Identifier: Apache-2.0
import sys
from pathlib import Path
import pytest import pytest
project_root = Path(__file__).parent.parent.parent pytestmark = [
sys.path.insert(0, str(project_root)) pytest.mark.pre_merge,
sys.path.insert(0, str(project_root / "components" / "src")) pytest.mark.gpu_0,
pytest.mark.unit,
pytestmark = [pytest.mark.pre_merge, pytest.mark.gpu_0, pytest.mark.unit] pytest.mark.planner,
]
try: try:
from dynamo.profiler.utils.config import update_image from dynamo.profiler.utils.config import update_image
......
...@@ -11,6 +11,7 @@ pytestmark = [ ...@@ -11,6 +11,7 @@ pytestmark = [
pytest.mark.unit, pytest.mark.unit,
pytest.mark.gpu_0, pytest.mark.gpu_0,
pytest.mark.pre_merge, pytest.mark.pre_merge,
pytest.mark.planner,
pytest.mark.parallel, pytest.mark.parallel,
] ]
......
...@@ -24,6 +24,7 @@ pytestmark = [ ...@@ -24,6 +24,7 @@ pytestmark = [
pytest.mark.unit, pytest.mark.unit,
pytest.mark.gpu_0, pytest.mark.gpu_0,
pytest.mark.pre_merge, pytest.mark.pre_merge,
pytest.mark.planner,
pytest.mark.parallel, pytest.mark.parallel,
] ]
......
...@@ -41,7 +41,11 @@ RUN --mount=type=bind,from=target,target=/target \ ...@@ -41,7 +41,11 @@ RUN --mount=type=bind,from=target,target=/target \
python3 /helpers/python_helper.py --root /target > /output/python.tsv 2>/output/python_err.txt ; \ python3 /helpers/python_helper.py --root /target > /output/python.tsv 2>/output/python_err.txt ; \
cat /output/dpkg_err.txt >&2 ; \ cat /output/dpkg_err.txt >&2 ; \
cat /output/python_err.txt >&2 ; \ cat /output/python_err.txt >&2 ; \
if [ -f /target/var/lib/dpkg/status ]; then \
[ -s /output/dpkg.tsv ] || { echo "ERROR: dpkg extraction produced no output" >&2; exit 1; } ; \ [ -s /output/dpkg.tsv ] || { echo "ERROR: dpkg extraction produced no output" >&2; exit 1; } ; \
else \
echo "⚠️ WARNING: dpkg status file not present; skipping OS package extraction" >&2 ; \
fi ; \
[ -s /output/python.tsv ] || echo "⚠️ WARNING: python extraction produced no output" >&2 [ -s /output/python.tsv ] || echo "⚠️ WARNING: python extraction produced no output" >&2
FROM scratch FROM scratch
......
...@@ -120,6 +120,12 @@ def parse_dpkg_status(status_path): ...@@ -120,6 +120,12 @@ def parse_dpkg_status(status_path):
elif ":" in line: elif ":" in line:
key, _, val = line.partition(":") key, _, val = line.partition(":")
current[key.strip()] = val.strip() current[key.strip()] = val.strip()
except FileNotFoundError:
print(
f"WARNING: No dpkg status file found: {status_path}",
file=sys.stderr,
)
return {}
except (OSError, IOError): except (OSError, IOError):
print(f"ERROR: Cannot read dpkg status file: {status_path}", file=sys.stderr) print(f"ERROR: Cannot read dpkg status file: {status_path}", file=sys.stderr)
sys.exit(1) sys.exit(1)
......
...@@ -32,7 +32,7 @@ def main() -> None: ...@@ -32,7 +32,7 @@ def main() -> None:
parser.add_argument( parser.add_argument(
"--target", "--target",
default="runtime", default="runtime",
choices=["runtime", "frontend"], choices=["runtime", "frontend", "planner"],
help="Build target (default: runtime)", help="Build target (default: runtime)",
) )
parser.add_argument( parser.add_argument(
...@@ -74,6 +74,25 @@ def main() -> None: ...@@ -74,6 +74,25 @@ def main() -> None:
print(image) print(image)
return return
if args.target == "planner":
if args.framework != "dynamo":
print(
"ERROR: --target planner is only supported for --framework dynamo",
file=sys.stderr,
)
sys.exit(1)
planner_cfg = ctx.get("dynamo", {})
runtime_image = planner_cfg.get("planner_runtime_image")
runtime_image_tag = planner_cfg.get("planner_runtime_image_tag")
if not runtime_image or not runtime_image_tag:
print(
"ERROR: planner_runtime_image/planner_runtime_image_tag not found in context.yaml dynamo section",
file=sys.stderr,
)
sys.exit(1)
print(f"{runtime_image}:{runtime_image_tag}")
return
# Runtime target # Runtime target
if not args.cuda_version: if not args.cuda_version:
print("ERROR: --cuda-version is required for runtime targets", file=sys.stderr) print("ERROR: --cuda-version is required for runtime targets", file=sys.stderr)
......
...@@ -63,9 +63,8 @@ RUN --mount=type=bind,source=./container/deps/requirements.planner.txt,target=/t ...@@ -63,9 +63,8 @@ RUN --mount=type=bind,source=./container/deps/requirements.planner.txt,target=/t
/opt/dynamo/wheelhouse/ai_dynamo*any.whl /opt/dynamo/wheelhouse/ai_dynamo*any.whl
# Copy only the subset of the repository needed for planner/profiler service # Copy only the subset of the repository needed for planner/profiler service
# startup and targeted planner/profiler unit tests. # startup and the component-local planner-family test suites.
COPY --chmod=664 --chown=dynamo:0 pyproject.toml /workspace/pyproject.toml COPY --chmod=664 --chown=dynamo:0 pyproject.toml /workspace/pyproject.toml
COPY --chmod=775 --chown=dynamo:0 tests /workspace/tests
COPY --chmod=775 --chown=dynamo:0 components/src/dynamo/planner /workspace/components/src/dynamo/planner COPY --chmod=775 --chown=dynamo:0 components/src/dynamo/planner /workspace/components/src/dynamo/planner
COPY --chmod=775 --chown=dynamo:0 components/src/dynamo/profiler /workspace/components/src/dynamo/profiler COPY --chmod=775 --chown=dynamo:0 components/src/dynamo/profiler /workspace/components/src/dynamo/profiler
COPY --chmod=775 --chown=dynamo:0 components/src/dynamo/global_planner /workspace/components/src/dynamo/global_planner COPY --chmod=775 --chown=dynamo:0 components/src/dynamo/global_planner /workspace/components/src/dynamo/global_planner
...@@ -75,7 +74,7 @@ COPY --chmod=775 --chown=dynamo:0 examples /workspace/examples ...@@ -75,7 +74,7 @@ COPY --chmod=775 --chown=dynamo:0 examples /workspace/examples
FROM ${PLANNER_RUNTIME_IMAGE}:${PLANNER_RUNTIME_IMAGE_TAG} AS planner FROM ${PLANNER_RUNTIME_IMAGE}:${PLANNER_RUNTIME_IMAGE_TAG} AS planner
COPY --from=planner_builder /etc/group /etc/passwd /etc/ COPY --from=planner_builder /etc/group /etc/passwd /etc/
COPY --from=planner_builder /bin/dash /usr/bin/sh COPY --from=planner_builder /bin/dash /bin/sh
COPY --from=planner_builder /bin/uv /bin/uvx /usr/local/bin/ COPY --from=planner_builder /bin/uv /bin/uvx /usr/local/bin/
COPY --chown=1000:0 --from=planner_builder /home/dynamo /home/dynamo COPY --chown=1000:0 --from=planner_builder /home/dynamo /home/dynamo
COPY --chown=1000:0 --from=planner_builder /opt/dynamo/venv /opt/dynamo/venv COPY --chown=1000:0 --from=planner_builder /opt/dynamo/venv /opt/dynamo/venv
...@@ -90,7 +89,7 @@ ENV DYNAMO_COMMIT_SHA=${DYNAMO_COMMIT_SHA} \ ...@@ -90,7 +89,7 @@ ENV DYNAMO_COMMIT_SHA=${DYNAMO_COMMIT_SHA} \
VIRTUAL_ENV=/opt/dynamo/venv \ VIRTUAL_ENV=/opt/dynamo/venv \
LD_LIBRARY_PATH="/opt/dynamo/lib" \ LD_LIBRARY_PATH="/opt/dynamo/lib" \
PATH="/opt/dynamo/venv/bin:/usr/local/bin/etcd:/usr/local/bin:/bin" \ PATH="/opt/dynamo/venv/bin:/usr/local/bin/etcd:/usr/local/bin:/bin" \
PYTHONPATH="/workspace" PYTHONPATH="/workspace/components/src:/workspace"
WORKDIR /workspace WORKDIR /workspace
USER dynamo USER dynamo
......
...@@ -86,7 +86,7 @@ args: ...@@ -86,7 +86,7 @@ args:
- --loadbased-adjustment-interval=5 - --loadbased-adjustment-interval=5
``` ```
The planner will auto-discover the frontend metrics endpoint from the DGD. See [disagg_planner_load.yaml](https://github.com/ai-dynamo/dynamo/blob/main/tests/planner/scaling/disagg_planner_load.yaml) for a complete example. The planner will auto-discover the frontend metrics endpoint from the DGD. See [disagg_planner.yaml](https://github.com/ai-dynamo/dynamo/blob/main/examples/backends/vllm/deploy/disagg_planner.yaml) for a complete example.
### Manual DGD Deployment ### Manual DGD Deployment
......
...@@ -239,7 +239,7 @@ The planner inside each pool must be configured for `global-planner` mode so it ...@@ -239,7 +239,7 @@ The planner inside each pool must be configured for `global-planner` mode so it
"ttft": 2000, "ttft": 2000,
"prefill_engine_num_gpu": 2, "prefill_engine_num_gpu": 2,
"model_name": "${MODEL_NAME}", "model_name": "${MODEL_NAME}",
"profile_results_dir": "/workspace/tests/planner/profiling_results/H200_TP1P_TP1D" "profile_results_dir": "/workspace/components/src/dynamo/planner/tests/data/profiling_results/H200_TP1P_TP1D"
} }
``` ```
......
...@@ -241,7 +241,7 @@ It also accepts older raw-data directories containing: ...@@ -241,7 +241,7 @@ It also accepts older raw-data directories containing:
```bash ```bash
python -m dynamo.mocker \ python -m dynamo.mocker \
--model-path nvidia/Llama-3.1-8B-Instruct-FP8 \ --model-path nvidia/Llama-3.1-8B-Instruct-FP8 \
--planner-profile-data tests/planner/profiling_results/H200_TP1P_TP1D \ --planner-profile-data components/src/dynamo/planner/tests/data/profiling_results/H200_TP1P_TP1D \
--speedup-ratio 1.0 --speedup-ratio 1.0
``` ```
......
...@@ -34,4 +34,4 @@ spec: ...@@ -34,4 +34,4 @@ spec:
- --speedup-ratio - --speedup-ratio
- "1.0" - "1.0"
- --planner-profile-data - --planner-profile-data
- /workspace/tests/planner/profiling_results/H200_TP1P_TP1D - /workspace/components/src/dynamo/planner/tests/data/profiling_results/H200_TP1P_TP1D
\ No newline at end of file
...@@ -34,7 +34,7 @@ spec: ...@@ -34,7 +34,7 @@ spec:
- --speedup-ratio - --speedup-ratio
- "1.0" - "1.0"
- --planner-profile-data - --planner-profile-data
- /workspace/tests/planner/profiling_results/H200_TP1P_TP1D - /workspace/components/src/dynamo/planner/tests/data/profiling_results/H200_TP1P_TP1D
- --disaggregation-mode - --disaggregation-mode
- prefill - prefill
decode: decode:
...@@ -58,6 +58,6 @@ spec: ...@@ -58,6 +58,6 @@ spec:
- --speedup-ratio - --speedup-ratio
- "1.0" - "1.0"
- --planner-profile-data - --planner-profile-data
- /workspace/tests/planner/profiling_results/H200_TP1P_TP1D - /workspace/components/src/dynamo/planner/tests/data/profiling_results/H200_TP1P_TP1D
- --disaggregation-mode - --disaggregation-mode
- decode - decode
...@@ -168,7 +168,7 @@ spec: ...@@ -168,7 +168,7 @@ spec:
- dynamo.planner - dynamo.planner
args: args:
- --config - --config
- '{"environment":"global-planner","global_planner_namespace":"${K8S_NAMESPACE}-gp-ctrl","backend":"vllm","mode":"disagg","enable_load_scaling":false,"enable_throughput_scaling":true,"throughput_metrics_source":"router","ttft":2000,"itl":200,"max_gpu_budget":-1,"prefill_engine_num_gpu":1,"decode_engine_num_gpu":1,"model_name":"${MODEL_A}","profile_results_dir":"/workspace/tests/planner/profiling_results/H200_TP1P_TP1D"}' - '{"environment":"global-planner","global_planner_namespace":"${K8S_NAMESPACE}-gp-ctrl","backend":"vllm","mode":"disagg","enable_load_scaling":false,"enable_throughput_scaling":true,"throughput_metrics_source":"router","ttft":2000,"itl":200,"max_gpu_budget":-1,"prefill_engine_num_gpu":1,"decode_engine_num_gpu":1,"model_name":"${MODEL_A}","profile_results_dir":"/workspace/components/src/dynamo/planner/tests/data/profiling_results/H200_TP1P_TP1D"}'
--- ---
# ── Model B: self-contained disagg serving DGD ────────────────────────────── # ── Model B: self-contained disagg serving DGD ──────────────────────────────
apiVersion: nvidia.com/v1alpha1 apiVersion: nvidia.com/v1alpha1
...@@ -263,4 +263,4 @@ spec: ...@@ -263,4 +263,4 @@ spec:
- dynamo.planner - dynamo.planner
args: args:
- --config - --config
- '{"environment":"global-planner","global_planner_namespace":"${K8S_NAMESPACE}-gp-ctrl","backend":"vllm","mode":"disagg","enable_load_scaling":false,"enable_throughput_scaling":true,"throughput_metrics_source":"router","ttft":2000,"itl":200,"max_gpu_budget":-1,"prefill_engine_num_gpu":1,"decode_engine_num_gpu":1,"model_name":"${MODEL_B}","profile_results_dir":"/workspace/tests/planner/profiling_results/H200_TP1P_TP1D"}' - '{"environment":"global-planner","global_planner_namespace":"${K8S_NAMESPACE}-gp-ctrl","backend":"vllm","mode":"disagg","enable_load_scaling":false,"enable_throughput_scaling":true,"throughput_metrics_source":"router","ttft":2000,"itl":200,"max_gpu_budget":-1,"prefill_engine_num_gpu":1,"decode_engine_num_gpu":1,"model_name":"${MODEL_B}","profile_results_dir":"/workspace/components/src/dynamo/planner/tests/data/profiling_results/H200_TP1P_TP1D"}'
...@@ -164,7 +164,7 @@ spec: ...@@ -164,7 +164,7 @@ spec:
- --speedup-ratio - --speedup-ratio
- "5.0" - "5.0"
- --planner-profile-data - --planner-profile-data
- /workspace/tests/planner/profiling_results/H200_TP1P_TP1D - /workspace/components/src/dynamo/planner/tests/data/profiling_results/H200_TP1P_TP1D
- --is-prefill-worker - --is-prefill-worker
Planner: Planner:
...@@ -181,7 +181,7 @@ spec: ...@@ -181,7 +181,7 @@ spec:
- dynamo.planner - dynamo.planner
args: args:
- --config - --config
- '{"environment":"global-planner","global_planner_namespace":"${K8S_NAMESPACE}-gp-ctrl","backend":"mocker","mode":"prefill","throughput_metrics_source":"router","throughput_adjustment_interval":30,"ttft":2000,"max_gpu_budget":-1,"prefill_engine_num_gpu":1,"no_correction":true,"profile_results_dir":"/workspace/tests/planner/profiling_results/H200_TP1P_TP1D","model_name":"nvidia/Llama-3.1-8B-Instruct-FP8"}' - '{"environment":"global-planner","global_planner_namespace":"${K8S_NAMESPACE}-gp-ctrl","backend":"mocker","mode":"prefill","throughput_metrics_source":"router","throughput_adjustment_interval":30,"ttft":2000,"max_gpu_budget":-1,"prefill_engine_num_gpu":1,"no_correction":true,"profile_results_dir":"/workspace/components/src/dynamo/planner/tests/data/profiling_results/H200_TP1P_TP1D","model_name":"nvidia/Llama-3.1-8B-Instruct-FP8"}'
--- ---
apiVersion: nvidia.com/v1alpha1 apiVersion: nvidia.com/v1alpha1
kind: DynamoGraphDeployment kind: DynamoGraphDeployment
...@@ -232,7 +232,7 @@ spec: ...@@ -232,7 +232,7 @@ spec:
- --speedup-ratio - --speedup-ratio
- "5.0" - "5.0"
- --planner-profile-data - --planner-profile-data
- /workspace/tests/planner/profiling_results/H200_TP1P_TP1D - /workspace/components/src/dynamo/planner/tests/data/profiling_results/H200_TP1P_TP1D
- --is-prefill-worker - --is-prefill-worker
Planner: Planner:
...@@ -249,7 +249,7 @@ spec: ...@@ -249,7 +249,7 @@ spec:
- dynamo.planner - dynamo.planner
args: args:
- --config - --config
- '{"environment":"global-planner","global_planner_namespace":"${K8S_NAMESPACE}-gp-ctrl","backend":"mocker","mode":"prefill","throughput_metrics_source":"router","throughput_adjustment_interval":30,"ttft":2000,"max_gpu_budget":-1,"prefill_engine_num_gpu":1,"no_correction":true,"profile_results_dir":"/workspace/tests/planner/profiling_results/H200_TP1P_TP1D","model_name":"nvidia/Llama-3.1-8B-Instruct-FP8"}' - '{"environment":"global-planner","global_planner_namespace":"${K8S_NAMESPACE}-gp-ctrl","backend":"mocker","mode":"prefill","throughput_metrics_source":"router","throughput_adjustment_interval":30,"ttft":2000,"max_gpu_budget":-1,"prefill_engine_num_gpu":1,"no_correction":true,"profile_results_dir":"/workspace/components/src/dynamo/planner/tests/data/profiling_results/H200_TP1P_TP1D","model_name":"nvidia/Llama-3.1-8B-Instruct-FP8"}'
--- ---
apiVersion: nvidia.com/v1alpha1 apiVersion: nvidia.com/v1alpha1
kind: DynamoGraphDeployment kind: DynamoGraphDeployment
...@@ -301,7 +301,7 @@ spec: ...@@ -301,7 +301,7 @@ spec:
- --speedup-ratio - --speedup-ratio
- "5.0" - "5.0"
- --planner-profile-data - --planner-profile-data
- /workspace/tests/planner/profiling_results/H200_TP1P_TP1D - /workspace/components/src/dynamo/planner/tests/data/profiling_results/H200_TP1P_TP1D
Planner: Planner:
componentType: planner componentType: planner
...@@ -317,7 +317,7 @@ spec: ...@@ -317,7 +317,7 @@ spec:
- dynamo.planner - dynamo.planner
args: args:
- --config - --config
- '{"environment":"global-planner","global_planner_namespace":"${K8S_NAMESPACE}-gp-ctrl","backend":"mocker","mode":"decode","throughput_metrics_source":"router","throughput_adjustment_interval":30,"itl":200,"max_gpu_budget":-1,"decode_engine_num_gpu":1,"no_correction":true,"profile_results_dir":"/workspace/tests/planner/profiling_results/H200_TP1P_TP1D","model_name":"nvidia/Llama-3.1-8B-Instruct-FP8"}' - '{"environment":"global-planner","global_planner_namespace":"${K8S_NAMESPACE}-gp-ctrl","backend":"mocker","mode":"decode","throughput_metrics_source":"router","throughput_adjustment_interval":30,"itl":200,"max_gpu_budget":-1,"decode_engine_num_gpu":1,"no_correction":true,"profile_results_dir":"/workspace/components/src/dynamo/planner/tests/data/profiling_results/H200_TP1P_TP1D","model_name":"nvidia/Llama-3.1-8B-Instruct-FP8"}'
--- ---
apiVersion: nvidia.com/v1alpha1 apiVersion: nvidia.com/v1alpha1
kind: DynamoGraphDeployment kind: DynamoGraphDeployment
...@@ -369,7 +369,7 @@ spec: ...@@ -369,7 +369,7 @@ spec:
- --speedup-ratio - --speedup-ratio
- "5.0" - "5.0"
- --planner-profile-data - --planner-profile-data
- /workspace/tests/planner/profiling_results/H200_TP1P_TP1D - /workspace/components/src/dynamo/planner/tests/data/profiling_results/H200_TP1P_TP1D
Planner: Planner:
componentType: planner componentType: planner
...@@ -385,4 +385,4 @@ spec: ...@@ -385,4 +385,4 @@ spec:
- dynamo.planner - dynamo.planner
args: args:
- --config - --config
- '{"environment":"global-planner","global_planner_namespace":"${K8S_NAMESPACE}-gp-ctrl","backend":"mocker","mode":"decode","throughput_metrics_source":"router","throughput_adjustment_interval":30,"itl":200,"max_gpu_budget":-1,"decode_engine_num_gpu":1,"no_correction":true,"profile_results_dir":"/workspace/tests/planner/profiling_results/H200_TP1P_TP1D","model_name":"nvidia/Llama-3.1-8B-Instruct-FP8"}' - '{"environment":"global-planner","global_planner_namespace":"${K8S_NAMESPACE}-gp-ctrl","backend":"mocker","mode":"decode","throughput_metrics_source":"router","throughput_adjustment_interval":30,"itl":200,"max_gpu_budget":-1,"decode_engine_num_gpu":1,"no_correction":true,"profile_results_dir":"/workspace/components/src/dynamo/planner/tests/data/profiling_results/H200_TP1P_TP1D","model_name":"nvidia/Llama-3.1-8B-Instruct-FP8"}'
...@@ -233,7 +233,7 @@ spec: ...@@ -233,7 +233,7 @@ spec:
- dynamo.planner - dynamo.planner
args: args:
- --config - --config
- '{"environment":"global-planner","global_planner_namespace":"${K8S_NAMESPACE}-gp-ctrl","backend":"vllm","mode":"prefill","enable_load_scaling":false,"enable_throughput_scaling":true,"throughput_metrics_source":"router","ttft":2000,"max_gpu_budget":-1,"prefill_engine_num_gpu":1,"model_name":"${MODEL_NAME}","profile_results_dir":"/workspace/tests/planner/profiling_results/H200_TP1P_TP1D"}' - '{"environment":"global-planner","global_planner_namespace":"${K8S_NAMESPACE}-gp-ctrl","backend":"vllm","mode":"prefill","enable_load_scaling":false,"enable_throughput_scaling":true,"throughput_metrics_source":"router","ttft":2000,"max_gpu_budget":-1,"prefill_engine_num_gpu":1,"model_name":"${MODEL_NAME}","profile_results_dir":"/workspace/components/src/dynamo/planner/tests/data/profiling_results/H200_TP1P_TP1D"}'
--- ---
apiVersion: nvidia.com/v1alpha1 apiVersion: nvidia.com/v1alpha1
kind: DynamoGraphDeployment kind: DynamoGraphDeployment
...@@ -308,7 +308,7 @@ spec: ...@@ -308,7 +308,7 @@ spec:
- dynamo.planner - dynamo.planner
args: args:
- --config - --config
- '{"environment":"global-planner","global_planner_namespace":"${K8S_NAMESPACE}-gp-ctrl","backend":"vllm","mode":"prefill","enable_load_scaling":false,"enable_throughput_scaling":true,"throughput_metrics_source":"router","ttft":2000,"max_gpu_budget":-1,"prefill_engine_num_gpu":2,"model_name":"${MODEL_NAME}","profile_results_dir":"/workspace/tests/planner/profiling_results/H200_TP1P_TP1D"}' - '{"environment":"global-planner","global_planner_namespace":"${K8S_NAMESPACE}-gp-ctrl","backend":"vllm","mode":"prefill","enable_load_scaling":false,"enable_throughput_scaling":true,"throughput_metrics_source":"router","ttft":2000,"max_gpu_budget":-1,"prefill_engine_num_gpu":2,"model_name":"${MODEL_NAME}","profile_results_dir":"/workspace/components/src/dynamo/planner/tests/data/profiling_results/H200_TP1P_TP1D"}'
--- ---
apiVersion: nvidia.com/v1alpha1 apiVersion: nvidia.com/v1alpha1
kind: DynamoGraphDeployment kind: DynamoGraphDeployment
...@@ -383,4 +383,4 @@ spec: ...@@ -383,4 +383,4 @@ spec:
- dynamo.planner - dynamo.planner
args: args:
- --config - --config
- '{"environment":"global-planner","global_planner_namespace":"${K8S_NAMESPACE}-gp-ctrl","backend":"vllm","mode":"decode","enable_load_scaling":false,"enable_throughput_scaling":true,"throughput_metrics_source":"router","itl":200,"max_gpu_budget":-1,"decode_engine_num_gpu":1,"model_name":"${MODEL_NAME}","profile_results_dir":"/workspace/tests/planner/profiling_results/H200_TP1P_TP1D"}' - '{"environment":"global-planner","global_planner_namespace":"${K8S_NAMESPACE}-gp-ctrl","backend":"vllm","mode":"decode","enable_load_scaling":false,"enable_throughput_scaling":true,"throughput_metrics_source":"router","itl":200,"max_gpu_budget":-1,"decode_engine_num_gpu":1,"model_name":"${MODEL_NAME}","profile_results_dir":"/workspace/components/src/dynamo/planner/tests/data/profiling_results/H200_TP1P_TP1D"}'
...@@ -353,7 +353,7 @@ def _run_aic_static_point(backend_name: str, isl: int, osl: int, batch_size: int ...@@ -353,7 +353,7 @@ def _run_aic_static_point(backend_name: str, isl: int, osl: int, batch_size: int
def _planner_profile_data_dir_path() -> Path: def _planner_profile_data_dir_path() -> Path:
return ( return (
Path(__file__).resolve().parents[5] Path(__file__).resolve().parents[5]
/ "tests/planner/profiling_results/H200_TP1P_TP1D" / "components/src/dynamo/planner/tests/data/profiling_results/H200_TP1P_TP1D"
) )
......
...@@ -314,6 +314,11 @@ ignore_missing_imports = true ...@@ -314,6 +314,11 @@ ignore_missing_imports = true
module = ["dynamo.*.tests.*", "dynamo.*.tests"] module = ["dynamo.*.tests.*", "dynamo.*.tests"]
ignore_errors = true ignore_errors = true
[[tool.mypy.overrides]]
# Manual planner helpers are operational scripts, not typed library surfaces.
module = ["dynamo.planner.manual.*"]
ignore_errors = true
[[tool.mypy.overrides]] [[tool.mypy.overrides]]
# Skip mypy analysis on backend framework internals. # Skip mypy analysis on backend framework internals.
# ignore_missing_imports silences import-not-found only when the backend # ignore_missing_imports silences import-not-found only when the backend
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment