sweep_runner.py 3.32 KB
Newer Older
1
2
3
4
5
6
#!/usr/bin/env python3
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
"""
Frontend performance sweep runner.

7
8
9
10
11
12
Thin CLI entry point that delegates to sweep_core (pure logic), sweep_executors
(how runs execute), and sweep_k8s (k8s helpers).

Supports two execution modes:
  - local: delegates each run to run_perf.sh (mocker + frontend per run)
  - k8s: DGD-based execution with aiperf against a k8s-deployed frontend
13
14
15
16
17
18
19
20

Sweep dimensions (all configurable):
  - tokenizers (hf, fastokens)
  - concurrency levels
  - ISL values
  - worker counts

Usage:
21
    # Local smoke test (2 runs)
22
    python3 sweep_runner.py --tokenizers hf,fastokens --concurrency 32 --isl 512 \\
23
        --benchmark-duration 30 --speedup-ratio 1000000
24

25
    # Full local sweep with mocker
26
27
    python3 sweep_runner.py --tokenizers hf,fastokens --concurrency 32,64 --isl 512,1024,2048

28
29
30
31
32
33
34
    # K8s sweep with DGD
    python3 sweep_runner.py --mode k8s --dgd-name dynamo-bench-mocker \\
        --tokenizers hf,fastokens --concurrency 50,100 --isl 512

    # K8s with custom deploy template
    python3 sweep_runner.py --mode k8s --deploy-template dgd/templates/vllm.yaml \\
        --tokenizers hf --concurrency 128 --isl 1024
35

36
    # Transport saturation sweep
37
    python3 sweep_runner.py --tokenizers hf --concurrency 4096 \\
38
        --num-requests 16384,32768 --workers 1,2,4,8 --speedup-ratio 1000000
39
40
41

    # Dry run
    python3 sweep_runner.py --dry-run --tokenizers hf,fastokens --concurrency 32,64 --isl 512,1024
42
43
44

    # Emit plan as JSON (for Argo or MCP)
    python3 sweep_runner.py --emit-plan --tokenizers hf --concurrency 50 --isl 512
45
46
47
48
49
"""

import sys
from pathlib import Path

50
# Ensure the scripts directory is on the path for package imports
51
SCRIPT_DIR = Path(__file__).resolve().parent
52
53
if str(SCRIPT_DIR) not in sys.path:
    sys.path.insert(0, str(SCRIPT_DIR))
54

55
56
57
from sweep_core.config import build_argument_parser, config_from_args  # noqa: E402
from sweep_core.orchestrator import run as run_sweep  # noqa: E402
from sweep_core.planner import build_plan, print_plan  # noqa: E402
58
59
60


def main():
61
    parser = build_argument_parser()
62

63
    # Add CLI-only flags that don't belong in SweepConfig
64
    parser.add_argument(
65
66
67
        "--emit-plan",
        action="store_true",
        help="Print the sweep plan as JSON and exit (no execution)",
68
69
70
71
    )

    args = parser.parse_args()

72
73
    # Build typed config from args
    config = config_from_args(args)
74

75
76
77
    # Build plan
    plan = build_plan(config)
    print_plan(plan)
78

79
80
81
    # Emit plan JSON mode
    if args.emit_plan:
        print(plan.to_json())
82
83
        return

84
85
86
87
88
    # Dry run mode
    if config.dry_run:
        for i, run_spec in enumerate(plan.runs, 1):
            print(f"  [{i}/{plan.total_runs}] {run_spec.run_id}")
        return
89

90
91
92
    # Select executor based on mode
    if config.mode == "local":
        from sweep_executors.local import LocalExecutor
93

94
95
96
        executor = LocalExecutor()
    elif config.mode == "k8s":
        from sweep_executors.k8s_dgd import K8sDgdExecutor
97

98
99
100
101
102
103
104
        executor = K8sDgdExecutor()
    else:
        print(
            f"ERROR: Unknown mode '{config.mode}'. Use 'local' or 'k8s'.",
            file=sys.stderr,
        )
        sys.exit(1)
105

106
107
    # Run the sweep
    run_sweep(plan, executor)
108
109
110
111


if __name__ == "__main__":
    main()