Unverified Commit ab5a31b5 authored by Alec's avatar Alec Committed by GitHub
Browse files

test(planner): isolate planner-family suites [DYN-2534] (#7723)

parent cc22114d
......@@ -9,15 +9,18 @@ where needed.
"""
import os
import sys
from pathlib import Path
from unittest.mock import patch
import pytest
import yaml
project_root = Path(__file__).parent.parent.parent
sys.path.insert(0, str(project_root))
pytestmark = [
pytest.mark.pre_merge,
pytest.mark.gpu_0,
pytest.mark.unit,
pytest.mark.planner,
]
try:
from dynamo.planner.config.planner_config import (
......
......@@ -9,15 +9,17 @@ the end-to-end test suite.
"""
import copy
import sys
from pathlib import Path
from unittest.mock import patch
import pandas as pd
import pytest
project_root = Path(__file__).parent.parent.parent
sys.path.insert(0, str(project_root))
pytestmark = [
pytest.mark.pre_merge,
pytest.mark.gpu_0,
pytest.mark.unit,
pytest.mark.planner,
]
try:
from dynamo.profiler.rapid import _run_default_sim, _run_naive_fallback
......
......@@ -8,15 +8,17 @@ require live K8s deployments and are covered by the mocked end-to-end tests
in test_profile_sla_dgdr.py.
"""
import sys
from pathlib import Path
from unittest.mock import patch
import pandas as pd
import pytest
project_root = Path(__file__).parent.parent.parent
sys.path.insert(0, str(project_root))
pytestmark = [
pytest.mark.pre_merge,
pytest.mark.gpu_0,
pytest.mark.unit,
pytest.mark.planner,
]
try:
from dynamo.profiler.thorough import _pick_thorough_best_config
......
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
import sys
from pathlib import Path
import pytest
project_root = Path(__file__).parent.parent.parent
sys.path.insert(0, str(project_root))
sys.path.insert(0, str(project_root / "components" / "src"))
pytestmark = [pytest.mark.pre_merge, pytest.mark.gpu_0, pytest.mark.unit]
pytestmark = [
pytest.mark.pre_merge,
pytest.mark.gpu_0,
pytest.mark.unit,
pytest.mark.planner,
]
try:
from dynamo.profiler.utils.config import update_image
......
......@@ -11,6 +11,7 @@ pytestmark = [
pytest.mark.unit,
pytest.mark.gpu_0,
pytest.mark.pre_merge,
pytest.mark.planner,
pytest.mark.parallel,
]
......
......@@ -24,6 +24,7 @@ pytestmark = [
pytest.mark.unit,
pytest.mark.gpu_0,
pytest.mark.pre_merge,
pytest.mark.planner,
pytest.mark.parallel,
]
......
......@@ -41,7 +41,11 @@ RUN --mount=type=bind,from=target,target=/target \
python3 /helpers/python_helper.py --root /target > /output/python.tsv 2>/output/python_err.txt ; \
cat /output/dpkg_err.txt >&2 ; \
cat /output/python_err.txt >&2 ; \
if [ -f /target/var/lib/dpkg/status ]; then \
[ -s /output/dpkg.tsv ] || { echo "ERROR: dpkg extraction produced no output" >&2; exit 1; } ; \
else \
echo "⚠️ WARNING: dpkg status file not present; skipping OS package extraction" >&2 ; \
fi ; \
[ -s /output/python.tsv ] || echo "⚠️ WARNING: python extraction produced no output" >&2
FROM scratch
......
......@@ -120,6 +120,12 @@ def parse_dpkg_status(status_path):
elif ":" in line:
key, _, val = line.partition(":")
current[key.strip()] = val.strip()
except FileNotFoundError:
print(
f"WARNING: No dpkg status file found: {status_path}",
file=sys.stderr,
)
return {}
except (OSError, IOError):
print(f"ERROR: Cannot read dpkg status file: {status_path}", file=sys.stderr)
sys.exit(1)
......
......@@ -32,7 +32,7 @@ def main() -> None:
parser.add_argument(
"--target",
default="runtime",
choices=["runtime", "frontend"],
choices=["runtime", "frontend", "planner"],
help="Build target (default: runtime)",
)
parser.add_argument(
......@@ -74,6 +74,25 @@ def main() -> None:
print(image)
return
if args.target == "planner":
if args.framework != "dynamo":
print(
"ERROR: --target planner is only supported for --framework dynamo",
file=sys.stderr,
)
sys.exit(1)
planner_cfg = ctx.get("dynamo", {})
runtime_image = planner_cfg.get("planner_runtime_image")
runtime_image_tag = planner_cfg.get("planner_runtime_image_tag")
if not runtime_image or not runtime_image_tag:
print(
"ERROR: planner_runtime_image/planner_runtime_image_tag not found in context.yaml dynamo section",
file=sys.stderr,
)
sys.exit(1)
print(f"{runtime_image}:{runtime_image_tag}")
return
# Runtime target
if not args.cuda_version:
print("ERROR: --cuda-version is required for runtime targets", file=sys.stderr)
......
......@@ -63,9 +63,8 @@ RUN --mount=type=bind,source=./container/deps/requirements.planner.txt,target=/t
/opt/dynamo/wheelhouse/ai_dynamo*any.whl
# Copy only the subset of the repository needed for planner/profiler service
# startup and targeted planner/profiler unit tests.
# startup and the component-local planner-family test suites.
COPY --chmod=664 --chown=dynamo:0 pyproject.toml /workspace/pyproject.toml
COPY --chmod=775 --chown=dynamo:0 tests /workspace/tests
COPY --chmod=775 --chown=dynamo:0 components/src/dynamo/planner /workspace/components/src/dynamo/planner
COPY --chmod=775 --chown=dynamo:0 components/src/dynamo/profiler /workspace/components/src/dynamo/profiler
COPY --chmod=775 --chown=dynamo:0 components/src/dynamo/global_planner /workspace/components/src/dynamo/global_planner
......@@ -75,7 +74,7 @@ COPY --chmod=775 --chown=dynamo:0 examples /workspace/examples
FROM ${PLANNER_RUNTIME_IMAGE}:${PLANNER_RUNTIME_IMAGE_TAG} AS planner
COPY --from=planner_builder /etc/group /etc/passwd /etc/
COPY --from=planner_builder /bin/dash /usr/bin/sh
COPY --from=planner_builder /bin/dash /bin/sh
COPY --from=planner_builder /bin/uv /bin/uvx /usr/local/bin/
COPY --chown=1000:0 --from=planner_builder /home/dynamo /home/dynamo
COPY --chown=1000:0 --from=planner_builder /opt/dynamo/venv /opt/dynamo/venv
......@@ -90,7 +89,7 @@ ENV DYNAMO_COMMIT_SHA=${DYNAMO_COMMIT_SHA} \
VIRTUAL_ENV=/opt/dynamo/venv \
LD_LIBRARY_PATH="/opt/dynamo/lib" \
PATH="/opt/dynamo/venv/bin:/usr/local/bin/etcd:/usr/local/bin:/bin" \
PYTHONPATH="/workspace"
PYTHONPATH="/workspace/components/src:/workspace"
WORKDIR /workspace
USER dynamo
......
......@@ -86,7 +86,7 @@ args:
- --loadbased-adjustment-interval=5
```
The planner will auto-discover the frontend metrics endpoint from the DGD. See [disagg_planner_load.yaml](https://github.com/ai-dynamo/dynamo/blob/main/tests/planner/scaling/disagg_planner_load.yaml) for a complete example.
The planner will auto-discover the frontend metrics endpoint from the DGD. See [disagg_planner.yaml](https://github.com/ai-dynamo/dynamo/blob/main/examples/backends/vllm/deploy/disagg_planner.yaml) for a complete example.
### Manual DGD Deployment
......
......@@ -239,7 +239,7 @@ The planner inside each pool must be configured for `global-planner` mode so it
"ttft": 2000,
"prefill_engine_num_gpu": 2,
"model_name": "${MODEL_NAME}",
"profile_results_dir": "/workspace/tests/planner/profiling_results/H200_TP1P_TP1D"
"profile_results_dir": "/workspace/components/src/dynamo/planner/tests/data/profiling_results/H200_TP1P_TP1D"
}
```
......
......@@ -241,7 +241,7 @@ It also accepts older raw-data directories containing:
```bash
python -m dynamo.mocker \
--model-path nvidia/Llama-3.1-8B-Instruct-FP8 \
--planner-profile-data tests/planner/profiling_results/H200_TP1P_TP1D \
--planner-profile-data components/src/dynamo/planner/tests/data/profiling_results/H200_TP1P_TP1D \
--speedup-ratio 1.0
```
......
......@@ -34,4 +34,4 @@ spec:
- --speedup-ratio
- "1.0"
- --planner-profile-data
- /workspace/tests/planner/profiling_results/H200_TP1P_TP1D
\ No newline at end of file
- /workspace/components/src/dynamo/planner/tests/data/profiling_results/H200_TP1P_TP1D
......@@ -34,7 +34,7 @@ spec:
- --speedup-ratio
- "1.0"
- --planner-profile-data
- /workspace/tests/planner/profiling_results/H200_TP1P_TP1D
- /workspace/components/src/dynamo/planner/tests/data/profiling_results/H200_TP1P_TP1D
- --disaggregation-mode
- prefill
decode:
......@@ -58,6 +58,6 @@ spec:
- --speedup-ratio
- "1.0"
- --planner-profile-data
- /workspace/tests/planner/profiling_results/H200_TP1P_TP1D
- /workspace/components/src/dynamo/planner/tests/data/profiling_results/H200_TP1P_TP1D
- --disaggregation-mode
- decode
......@@ -168,7 +168,7 @@ spec:
- dynamo.planner
args:
- --config
- '{"environment":"global-planner","global_planner_namespace":"${K8S_NAMESPACE}-gp-ctrl","backend":"vllm","mode":"disagg","enable_load_scaling":false,"enable_throughput_scaling":true,"throughput_metrics_source":"router","ttft":2000,"itl":200,"max_gpu_budget":-1,"prefill_engine_num_gpu":1,"decode_engine_num_gpu":1,"model_name":"${MODEL_A}","profile_results_dir":"/workspace/tests/planner/profiling_results/H200_TP1P_TP1D"}'
- '{"environment":"global-planner","global_planner_namespace":"${K8S_NAMESPACE}-gp-ctrl","backend":"vllm","mode":"disagg","enable_load_scaling":false,"enable_throughput_scaling":true,"throughput_metrics_source":"router","ttft":2000,"itl":200,"max_gpu_budget":-1,"prefill_engine_num_gpu":1,"decode_engine_num_gpu":1,"model_name":"${MODEL_A}","profile_results_dir":"/workspace/components/src/dynamo/planner/tests/data/profiling_results/H200_TP1P_TP1D"}'
---
# ── Model B: self-contained disagg serving DGD ──────────────────────────────
apiVersion: nvidia.com/v1alpha1
......@@ -263,4 +263,4 @@ spec:
- dynamo.planner
args:
- --config
- '{"environment":"global-planner","global_planner_namespace":"${K8S_NAMESPACE}-gp-ctrl","backend":"vllm","mode":"disagg","enable_load_scaling":false,"enable_throughput_scaling":true,"throughput_metrics_source":"router","ttft":2000,"itl":200,"max_gpu_budget":-1,"prefill_engine_num_gpu":1,"decode_engine_num_gpu":1,"model_name":"${MODEL_B}","profile_results_dir":"/workspace/tests/planner/profiling_results/H200_TP1P_TP1D"}'
- '{"environment":"global-planner","global_planner_namespace":"${K8S_NAMESPACE}-gp-ctrl","backend":"vllm","mode":"disagg","enable_load_scaling":false,"enable_throughput_scaling":true,"throughput_metrics_source":"router","ttft":2000,"itl":200,"max_gpu_budget":-1,"prefill_engine_num_gpu":1,"decode_engine_num_gpu":1,"model_name":"${MODEL_B}","profile_results_dir":"/workspace/components/src/dynamo/planner/tests/data/profiling_results/H200_TP1P_TP1D"}'
......@@ -164,7 +164,7 @@ spec:
- --speedup-ratio
- "5.0"
- --planner-profile-data
- /workspace/tests/planner/profiling_results/H200_TP1P_TP1D
- /workspace/components/src/dynamo/planner/tests/data/profiling_results/H200_TP1P_TP1D
- --is-prefill-worker
Planner:
......@@ -181,7 +181,7 @@ spec:
- dynamo.planner
args:
- --config
- '{"environment":"global-planner","global_planner_namespace":"${K8S_NAMESPACE}-gp-ctrl","backend":"mocker","mode":"prefill","throughput_metrics_source":"router","throughput_adjustment_interval":30,"ttft":2000,"max_gpu_budget":-1,"prefill_engine_num_gpu":1,"no_correction":true,"profile_results_dir":"/workspace/tests/planner/profiling_results/H200_TP1P_TP1D","model_name":"nvidia/Llama-3.1-8B-Instruct-FP8"}'
- '{"environment":"global-planner","global_planner_namespace":"${K8S_NAMESPACE}-gp-ctrl","backend":"mocker","mode":"prefill","throughput_metrics_source":"router","throughput_adjustment_interval":30,"ttft":2000,"max_gpu_budget":-1,"prefill_engine_num_gpu":1,"no_correction":true,"profile_results_dir":"/workspace/components/src/dynamo/planner/tests/data/profiling_results/H200_TP1P_TP1D","model_name":"nvidia/Llama-3.1-8B-Instruct-FP8"}'
---
apiVersion: nvidia.com/v1alpha1
kind: DynamoGraphDeployment
......@@ -232,7 +232,7 @@ spec:
- --speedup-ratio
- "5.0"
- --planner-profile-data
- /workspace/tests/planner/profiling_results/H200_TP1P_TP1D
- /workspace/components/src/dynamo/planner/tests/data/profiling_results/H200_TP1P_TP1D
- --is-prefill-worker
Planner:
......@@ -249,7 +249,7 @@ spec:
- dynamo.planner
args:
- --config
- '{"environment":"global-planner","global_planner_namespace":"${K8S_NAMESPACE}-gp-ctrl","backend":"mocker","mode":"prefill","throughput_metrics_source":"router","throughput_adjustment_interval":30,"ttft":2000,"max_gpu_budget":-1,"prefill_engine_num_gpu":1,"no_correction":true,"profile_results_dir":"/workspace/tests/planner/profiling_results/H200_TP1P_TP1D","model_name":"nvidia/Llama-3.1-8B-Instruct-FP8"}'
- '{"environment":"global-planner","global_planner_namespace":"${K8S_NAMESPACE}-gp-ctrl","backend":"mocker","mode":"prefill","throughput_metrics_source":"router","throughput_adjustment_interval":30,"ttft":2000,"max_gpu_budget":-1,"prefill_engine_num_gpu":1,"no_correction":true,"profile_results_dir":"/workspace/components/src/dynamo/planner/tests/data/profiling_results/H200_TP1P_TP1D","model_name":"nvidia/Llama-3.1-8B-Instruct-FP8"}'
---
apiVersion: nvidia.com/v1alpha1
kind: DynamoGraphDeployment
......@@ -301,7 +301,7 @@ spec:
- --speedup-ratio
- "5.0"
- --planner-profile-data
- /workspace/tests/planner/profiling_results/H200_TP1P_TP1D
- /workspace/components/src/dynamo/planner/tests/data/profiling_results/H200_TP1P_TP1D
Planner:
componentType: planner
......@@ -317,7 +317,7 @@ spec:
- dynamo.planner
args:
- --config
- '{"environment":"global-planner","global_planner_namespace":"${K8S_NAMESPACE}-gp-ctrl","backend":"mocker","mode":"decode","throughput_metrics_source":"router","throughput_adjustment_interval":30,"itl":200,"max_gpu_budget":-1,"decode_engine_num_gpu":1,"no_correction":true,"profile_results_dir":"/workspace/tests/planner/profiling_results/H200_TP1P_TP1D","model_name":"nvidia/Llama-3.1-8B-Instruct-FP8"}'
- '{"environment":"global-planner","global_planner_namespace":"${K8S_NAMESPACE}-gp-ctrl","backend":"mocker","mode":"decode","throughput_metrics_source":"router","throughput_adjustment_interval":30,"itl":200,"max_gpu_budget":-1,"decode_engine_num_gpu":1,"no_correction":true,"profile_results_dir":"/workspace/components/src/dynamo/planner/tests/data/profiling_results/H200_TP1P_TP1D","model_name":"nvidia/Llama-3.1-8B-Instruct-FP8"}'
---
apiVersion: nvidia.com/v1alpha1
kind: DynamoGraphDeployment
......@@ -369,7 +369,7 @@ spec:
- --speedup-ratio
- "5.0"
- --planner-profile-data
- /workspace/tests/planner/profiling_results/H200_TP1P_TP1D
- /workspace/components/src/dynamo/planner/tests/data/profiling_results/H200_TP1P_TP1D
Planner:
componentType: planner
......@@ -385,4 +385,4 @@ spec:
- dynamo.planner
args:
- --config
- '{"environment":"global-planner","global_planner_namespace":"${K8S_NAMESPACE}-gp-ctrl","backend":"mocker","mode":"decode","throughput_metrics_source":"router","throughput_adjustment_interval":30,"itl":200,"max_gpu_budget":-1,"decode_engine_num_gpu":1,"no_correction":true,"profile_results_dir":"/workspace/tests/planner/profiling_results/H200_TP1P_TP1D","model_name":"nvidia/Llama-3.1-8B-Instruct-FP8"}'
- '{"environment":"global-planner","global_planner_namespace":"${K8S_NAMESPACE}-gp-ctrl","backend":"mocker","mode":"decode","throughput_metrics_source":"router","throughput_adjustment_interval":30,"itl":200,"max_gpu_budget":-1,"decode_engine_num_gpu":1,"no_correction":true,"profile_results_dir":"/workspace/components/src/dynamo/planner/tests/data/profiling_results/H200_TP1P_TP1D","model_name":"nvidia/Llama-3.1-8B-Instruct-FP8"}'
......@@ -233,7 +233,7 @@ spec:
- dynamo.planner
args:
- --config
- '{"environment":"global-planner","global_planner_namespace":"${K8S_NAMESPACE}-gp-ctrl","backend":"vllm","mode":"prefill","enable_load_scaling":false,"enable_throughput_scaling":true,"throughput_metrics_source":"router","ttft":2000,"max_gpu_budget":-1,"prefill_engine_num_gpu":1,"model_name":"${MODEL_NAME}","profile_results_dir":"/workspace/tests/planner/profiling_results/H200_TP1P_TP1D"}'
- '{"environment":"global-planner","global_planner_namespace":"${K8S_NAMESPACE}-gp-ctrl","backend":"vllm","mode":"prefill","enable_load_scaling":false,"enable_throughput_scaling":true,"throughput_metrics_source":"router","ttft":2000,"max_gpu_budget":-1,"prefill_engine_num_gpu":1,"model_name":"${MODEL_NAME}","profile_results_dir":"/workspace/components/src/dynamo/planner/tests/data/profiling_results/H200_TP1P_TP1D"}'
---
apiVersion: nvidia.com/v1alpha1
kind: DynamoGraphDeployment
......@@ -308,7 +308,7 @@ spec:
- dynamo.planner
args:
- --config
- '{"environment":"global-planner","global_planner_namespace":"${K8S_NAMESPACE}-gp-ctrl","backend":"vllm","mode":"prefill","enable_load_scaling":false,"enable_throughput_scaling":true,"throughput_metrics_source":"router","ttft":2000,"max_gpu_budget":-1,"prefill_engine_num_gpu":2,"model_name":"${MODEL_NAME}","profile_results_dir":"/workspace/tests/planner/profiling_results/H200_TP1P_TP1D"}'
- '{"environment":"global-planner","global_planner_namespace":"${K8S_NAMESPACE}-gp-ctrl","backend":"vllm","mode":"prefill","enable_load_scaling":false,"enable_throughput_scaling":true,"throughput_metrics_source":"router","ttft":2000,"max_gpu_budget":-1,"prefill_engine_num_gpu":2,"model_name":"${MODEL_NAME}","profile_results_dir":"/workspace/components/src/dynamo/planner/tests/data/profiling_results/H200_TP1P_TP1D"}'
---
apiVersion: nvidia.com/v1alpha1
kind: DynamoGraphDeployment
......@@ -383,4 +383,4 @@ spec:
- dynamo.planner
args:
- --config
- '{"environment":"global-planner","global_planner_namespace":"${K8S_NAMESPACE}-gp-ctrl","backend":"vllm","mode":"decode","enable_load_scaling":false,"enable_throughput_scaling":true,"throughput_metrics_source":"router","itl":200,"max_gpu_budget":-1,"decode_engine_num_gpu":1,"model_name":"${MODEL_NAME}","profile_results_dir":"/workspace/tests/planner/profiling_results/H200_TP1P_TP1D"}'
- '{"environment":"global-planner","global_planner_namespace":"${K8S_NAMESPACE}-gp-ctrl","backend":"vllm","mode":"decode","enable_load_scaling":false,"enable_throughput_scaling":true,"throughput_metrics_source":"router","itl":200,"max_gpu_budget":-1,"decode_engine_num_gpu":1,"model_name":"${MODEL_NAME}","profile_results_dir":"/workspace/components/src/dynamo/planner/tests/data/profiling_results/H200_TP1P_TP1D"}'
......@@ -353,7 +353,7 @@ def _run_aic_static_point(backend_name: str, isl: int, osl: int, batch_size: int
def _planner_profile_data_dir_path() -> Path:
return (
Path(__file__).resolve().parents[5]
/ "tests/planner/profiling_results/H200_TP1P_TP1D"
/ "components/src/dynamo/planner/tests/data/profiling_results/H200_TP1P_TP1D"
)
......
......@@ -314,6 +314,11 @@ ignore_missing_imports = true
module = ["dynamo.*.tests.*", "dynamo.*.tests"]
ignore_errors = true
[[tool.mypy.overrides]]
# Manual planner helpers are operational scripts, not typed library surfaces.
module = ["dynamo.planner.manual.*"]
ignore_errors = true
[[tool.mypy.overrides]]
# Skip mypy analysis on backend framework internals.
# ignore_missing_imports silences import-not-found only when the backend
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment