[Benchmark] Improve UX of sweep scripts (#35600)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>

[Benchmark] Improve UX of sweep scripts (#35600)
Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
4292e3b8 · Cyrus Leung · GitHub · 24d6ea8a · 4292e3b8 · 4292e3b8
Unverified Commit 4292e3b8 authored Feb 28, 2026 by Cyrus Leung Committed by GitHub Feb 28, 2026
6 changed files
--- a/docs/benchmarking/sweeps.md
+++ b/docs/benchmarking/sweeps.md
@@ -72,7 +72,7 @@ Follow these steps to run the script:
    ]
    ```
-5. Determine where you want to save the results, and pass that to `--output-dir`.
+5. Set `--output-dir` and optionally `--experiment-name` to control where to save the results.
 Example command:
@@ -82,7 +82,8 @@ vllm bench sweep serve \
    --bench-cmd 'vllm bench serve --model meta-llama/Llama-2-7b-chat-hf --backend vllm --endpoint /v1/completions --dataset-name sharegpt --dataset-path benchmarks/ShareGPT_V3_unfiltered_cleaned_split.json' \
    --serve-params benchmarks/serve_hparams.json \
    --bench-params benchmarks/bench_hparams.json \
-    -o benchmarks/results
+    --output-dir benchmarks/results \
+    --experiment-name demo
 ```
 By default, each parameter combination is benchmarked 3 times to make the results more reliable. You can adjust the number of runs by setting `--num-runs`.
@@ -118,7 +119,8 @@ vllm bench sweep serve_workload \
    --serve-params benchmarks/serve_hparams.json \
    --bench-params benchmarks/bench_hparams.json \
    --num-runs 1 \
-    -o benchmarks/results
+    --output-dir benchmarks/results \
+    --experiment-name demo
 ```
 The algorithm for exploring different workload levels can be summarized as follows:
@@ -186,7 +188,8 @@ vllm bench sweep startup \
    --startup-cmd 'vllm bench startup --model Qwen/Qwen3-0.6B' \
    --serve-params benchmarks/serve_hparams.json \
    --startup-params benchmarks/startup_hparams.json \
-    -o benchmarks/results
+    --output-dir benchmarks/results \
+    --experiment-name demo
 ```
 !!! important
@@ -204,11 +207,10 @@ Control the variables to plot via `--var-x` and `--var-y`, optionally applying `
 Example commands for visualizing [Workload Explorer](#workload-explorer) results:
 ```bash
-# Name of the directory that stores the results
+EXPERIMENT_DIR=${1:-"benchmarks/results/demo"}
-TIMESTAMP=$1
 # Latency increases as the workload increases
-vllm bench sweep plot benchmarks/results/$TIMESTAMP \
+vllm bench sweep plot $EXPERIMENT_DIR \
    --var-x max_concurrency \
    --var-y median_ttft_ms \
    --col-by _benchmark_name \
@@ -216,7 +218,7 @@ vllm bench sweep plot benchmarks/results/$TIMESTAMP \
    --fig-name latency_curve
 # Throughput saturates as workload increases
-vllm bench sweep plot benchmarks/results/$TIMESTAMP \
+vllm bench sweep plot $EXPERIMENT_DIR \
    --var-x max_concurrency \
    --var-y total_token_throughput \
    --col-by _benchmark_name \
@@ -224,7 +226,7 @@ vllm bench sweep plot benchmarks/results/$TIMESTAMP \
    --fig-name throughput_curve
 # Tradeoff between latency and throughput
-vllm bench sweep plot benchmarks/results/$TIMESTAMP \
+vllm bench sweep plot $EXPERIMENT_DIR \
    --var-x total_token_throughput \
    --var-y median_ttft_ms \
    --col-by _benchmark_name \
@@ -249,7 +251,9 @@ Higher concurrency or batch size can raise GPU efficiency (per-GPU), but can add
 Example:
 ```bash
-vllm bench sweep plot_pareto benchmarks/results/<timestamp> \
+EXPERIMENT_DIR=${1:-"benchmarks/results/demo"}
+vllm bench sweep plot_pareto $EXPERIMENT_DIR \
  --label-by max_concurrency,tensor_parallel_size,pipeline_parallel_size
 ```

--- a/vllm/benchmarks/sweep/plot.py
+++ b/vllm/benchmarks/sweep/plot.py
@@ -499,7 +499,7 @@ class SweepPlotArgs:
    @classmethod
    def from_cli_args(cls, args: argparse.Namespace):
-        output_dir = Path(args.OUTPUT_DIR)
+        output_dir = Path(args.EXPERIMENT_DIR)
        if not output_dir.exists():
            raise ValueError(f"No parameter sweep results under {output_dir}")
@@ -531,11 +531,9 @@ class SweepPlotArgs:
    @classmethod
    def add_cli_args(cls, parser: argparse.ArgumentParser) -> argparse.ArgumentParser:
        parser.add_argument(
-            "OUTPUT_DIR",
+            "EXPERIMENT_DIR",
            type=str,
-            default="results",
+            help="The directory containing the sweep results to plot.",
-            help="The directory containing the results to plot, "
-            "i.e., the `--output-dir` argument to the parameter sweep script.",
        )
        parser.add_argument(
            "--fig-dir",

--- a/vllm/benchmarks/sweep/plot_pareto.py
+++ b/vllm/benchmarks/sweep/plot_pareto.py
@@ -325,7 +325,7 @@ class SweepPlotParetoArgs:
    @classmethod
    def from_cli_args(cls, args: argparse.Namespace):
-        output_dir = Path(args.OUTPUT_DIR)
+        output_dir = Path(args.EXPERIMENT_DIR)
        if not output_dir.exists():
            raise ValueError(f"No parameter sweep results under {output_dir}")
@@ -342,9 +342,8 @@ class SweepPlotParetoArgs:
    @classmethod
    def add_cli_args(cls, parser: argparse.ArgumentParser):
        parser.add_argument(
-            "OUTPUT_DIR",
+            "EXPERIMENT_DIR",
            type=str,
-            default="results",
            help="The directory containing the sweep results to plot.",
        )
        parser.add_argument(

--- a/vllm/benchmarks/sweep/serve.py
+++ b/vllm/benchmarks/sweep/serve.py
@@ -4,6 +4,7 @@ import argparse
 import contextlib
 import json
 import shlex
+from contextlib import contextmanager
 from dataclasses import dataclass
 from datetime import datetime
 from pathlib import Path
@@ -135,7 +136,7 @@ def run_benchmark(
 def _get_comb_base_path(
-    output_dir: Path,
+    experiment_dir: Path,
    serve_comb: ParameterSweepItem,
    bench_comb: ParameterSweepItem,
    *,
@@ -149,7 +150,7 @@ def _get_comb_base_path(
    if extra_parts:
        parts.extend(extra_parts)
-    return output_dir / sanitize_filename("-".join(parts))
+    return experiment_dir / sanitize_filename("-".join(parts))
 def _get_comb_run_path(base_path: Path, run_number: int | None):
@@ -162,10 +163,10 @@ def _get_comb_run_path(base_path: Path, run_number: int | None):
 def _comb_needs_server(
    serve_comb: ParameterSweepItem,
    bench_combs: ParameterSweep,
-    output_dir: Path,
+    experiment_dir: Path,
 ):
    for bench_comb in bench_combs:
-        base_path = _get_comb_base_path(output_dir, serve_comb, bench_comb)
+        base_path = _get_comb_base_path(experiment_dir, serve_comb, bench_comb)
        if not _get_comb_run_path(base_path, run_number=None).exists():
            return True
@@ -179,11 +180,11 @@ def server_ctx(
    show_stdout: bool,
    serve_comb: ParameterSweepItem,
    bench_params: ParameterSweep,
-    output_dir: Path,
+    experiment_dir: Path,
    dry_run: bool,
    server_ready_timeout: int = 300,
 ):
-    if not _comb_needs_server(serve_comb, bench_params, output_dir):
+    if not _comb_needs_server(serve_comb, bench_params, experiment_dir):
        return contextlib.nullcontext()
    return run_server(
@@ -215,10 +216,10 @@ def run_comb(
    *,
    serve_comb: ParameterSweepItem,
    bench_comb: ParameterSweepItem,
+    link_vars: list[tuple[str, str]],
    base_path: Path,
    num_runs: int,
    dry_run: bool,
-    link_vars: list[tuple[str, str]],
 ):
    if not _comb_is_valid(serve_comb, bench_comb, link_vars):
        return None
@@ -257,10 +258,10 @@ def run_combs(
    server_ready_timeout: int,
    serve_params: ParameterSweep,
    bench_params: ParameterSweep,
-    output_dir: Path,
+    link_vars: list[tuple[str, str]],
+    experiment_dir: Path,
    num_runs: int,
    dry_run: bool,
-    link_vars: list[tuple[str, str]],
 ):
    all_data = list[dict[str, object]]()
    for serve_comb in serve_params:
@@ -270,22 +271,22 @@ def run_combs(
            show_stdout=show_stdout,
            serve_comb=serve_comb,
            bench_params=bench_params,
-            output_dir=output_dir,
+            experiment_dir=experiment_dir,
            dry_run=dry_run,
            server_ready_timeout=server_ready_timeout,
        ) as server:
            for bench_comb in bench_params:
-                base_path = _get_comb_base_path(output_dir, serve_comb, bench_comb)
+                base_path = _get_comb_base_path(experiment_dir, serve_comb, bench_comb)
                comb_data = run_comb(
                    server,
                    bench_cmd,
                    serve_comb=serve_comb,
                    bench_comb=bench_comb,
+                    link_vars=link_vars,
                    base_path=base_path,
                    num_runs=num_runs,
                    dry_run=dry_run,
-                    link_vars=link_vars,
                )
                if comb_data is not None:
@@ -295,7 +296,7 @@ def run_combs(
        return None
    combined_df = pd.DataFrame.from_records(all_data)
-    combined_df.to_csv(output_dir / "summary.csv")
+    combined_df.to_csv(experiment_dir / "summary.csv")
    return combined_df
@@ -309,11 +310,12 @@ class SweepServeArgs:
    server_ready_timeout: int
    serve_params: ParameterSweep
    bench_params: ParameterSweep
+    link_vars: list[tuple[str, str]]
    output_dir: Path
+    experiment_name: str
    num_runs: int
    dry_run: bool
-    resume: str | None
+    resume: bool
-    link_vars: list[tuple[str, str]]
    parser_name: ClassVar[str] = "serve"
    parser_help: ClassVar[str] = "Run vLLM server benchmark under multiple settings."
@@ -340,6 +342,11 @@ class SweepServeArgs:
        link_vars = cls.parse_link_vars(args.link_vars)
+        if args.experiment_name:
+            experiment_name = args.experiment_name
+        else:
+            experiment_name = datetime.now().strftime("%Y%m%d_%H%M%S")
        num_runs = args.num_runs
        if num_runs < 1:
            raise ValueError("`num_runs` should be at least 1.")
@@ -351,11 +358,12 @@ class SweepServeArgs:
            show_stdout=args.show_stdout,
            serve_params=serve_params,
            bench_params=bench_params,
+            link_vars=link_vars,
            output_dir=Path(args.output_dir),
+            experiment_name=experiment_name,
            num_runs=num_runs,
            dry_run=args.dry_run,
            resume=args.resume,
-            link_vars=link_vars,
            server_ready_timeout=args.server_ready_timeout,
        )
@@ -392,6 +400,7 @@ class SweepServeArgs:
            default=300,
            help="Timeout in seconds to wait for the server to become ready.",
        )
        parser.add_argument(
            "--serve-params",
            type=str,
@@ -402,6 +411,16 @@ class SweepServeArgs:
            "If both `serve_params` and `bench_params` are given, "
            "this script will iterate over their Cartesian product.",
        )
+        parser.add_argument(
+            "--link-vars",
+            type=str,
+            default="",
+            help=(
+                "Comma-separated list of linked variables between serve and bench, "
+                "e.g. max_num_seqs=max_concurrency,max_model_len=random_input_len"
+            ),
+        )
        parser.add_argument(
            "--bench-params",
            type=str,
@@ -417,7 +436,15 @@ class SweepServeArgs:
            "--output-dir",
            type=str,
            default="results",
-            help="The directory to which results are written.",
+            help="The main directory to which results are written.",
+        )
+        parser.add_argument(
+            "-e",
+            "--experiment-name",
+            type=str,
+            default=None,
+            help="The name of this experiment (defaults to current timestamp). "
+            "Results will be stored under `output_dir/experiment_name`.",
        )
        parser.add_argument(
            "--num-runs",
@@ -433,21 +460,10 @@ class SweepServeArgs:
        )
        parser.add_argument(
            "--resume",
-            type=str,
+            action="store_true",
-            default=None,
+            help="Resume a previous execution of this script, i.e., only run "
-            help="Set this to the name of a directory under `output_dir` (which is a "
+            "parameter combinations for which there are still no output files "
-            "timestamp) to resume a previous execution of this script, i.e., only run "
+            "under `output_dir/experiment_name`.",
-            "parameter combinations for which there are still no output files.",
-        )
-        parser.add_argument(
-            "--link-vars",
-            type=str,
-            default="",
-            help=(
-                "Comma-separated list of linked variables between serve and bench, "
-                "e.g. max_num_seqs=max_concurrency,max_model_len=random_input_len"
-            ),
        )
        return parser
@@ -462,33 +478,52 @@ class SweepServeArgs:
            pairs.append((a.strip(), b.strip()))
        return pairs
+    def resolve_experiment_dir(self) -> Path:
+        experiment_dir = self.output_dir / self.experiment_name
-def run_main(args: SweepServeArgs):
+        if self.resume:
-    timestamp = args.resume or datetime.now().strftime("%Y%m%d_%H%M%S")
+            if not experiment_dir.exists():
-    output_dir = args.output_dir / timestamp
+                raise ValueError(f"Cannot resume from non-existent {experiment_dir=}")
+        else:
+            if experiment_dir.exists():
+                raise ValueError(f"Cannot overwrite existing {experiment_dir=}")
-    if args.resume and not output_dir.exists():
+        return experiment_dir
-        raise ValueError(f"Cannot resume from non-existent directory ({output_dir})")
+    @contextmanager
+    def run_ctx(self, experiment_dir: Path):
+        if self.dry_run:
+            yield
+            print(f"Experiment will be saved at: {experiment_dir}")
+            return
        try:
+            yield
+            print(f"Experiment has been saved at: {experiment_dir}")
+        except BaseException as exc:
+            raise RuntimeError(
+                "The script was terminated early. Use `--resume` "
+                "to continue the script from its last checkpoint."
+            ) from exc
+def run_main(args: SweepServeArgs):
+    experiment_dir = args.resolve_experiment_dir()
+    with args.run_ctx(experiment_dir):
        return run_combs(
            serve_cmd=args.serve_cmd,
            bench_cmd=args.bench_cmd,
+            link_vars=args.link_vars,
            after_bench_cmd=args.after_bench_cmd,
            show_stdout=args.show_stdout,
            server_ready_timeout=args.server_ready_timeout,
            serve_params=args.serve_params,
            bench_params=args.bench_params,
-            output_dir=output_dir,
+            experiment_dir=experiment_dir,
            num_runs=args.num_runs,
            dry_run=args.dry_run,
-            link_vars=args.link_vars,
        )
-    except BaseException as exc:
-        raise RuntimeError(
-            f"The script was terminated early. Use `--resume {timestamp}` "
-            f"to continue the script from its last checkpoint."
-        ) from exc
 def main(args: argparse.Namespace):

--- a/vllm/benchmarks/sweep/serve_workload.py
+++ b/vllm/benchmarks/sweep/serve_workload.py
@@ -3,7 +3,6 @@
 import argparse
 import math
 from dataclasses import asdict, dataclass
-from datetime import datetime
 from pathlib import Path
 from typing import ClassVar, Literal, get_args
@@ -59,10 +58,10 @@ def run_comb_workload(
    *,
    serve_comb: ParameterSweepItem,
    bench_comb: ParameterSweepItem,
-    output_dir: Path,
+    link_vars: list[tuple[str, str]],
+    experiment_dir: Path,
    num_runs: int,
    dry_run: bool,
-    link_vars: list[tuple[str, str]],
    workload_var: WorkloadVariable,
    workload_value: int,
 ) -> list[dict[str, object]] | None:
@@ -73,15 +72,15 @@ def run_comb_workload(
        bench_cmd,
        serve_comb=serve_comb,
        bench_comb=bench_comb_workload,
+        link_vars=link_vars,
        base_path=_get_comb_base_path(
-            output_dir,
+            experiment_dir,
            serve_comb,
            bench_comb,
            extra_parts=("WL-", f"{workload_var}={workload_value}"),
        ),
        num_runs=num_runs,
        dry_run=dry_run,
-        link_vars=link_vars,
    )
@@ -91,12 +90,12 @@ def explore_comb_workloads(
    *,
    serve_comb: ParameterSweepItem,
    bench_comb: ParameterSweepItem,
+    link_vars: list[tuple[str, str]],
    workload_var: WorkloadVariable,
    workload_iters: int,
-    output_dir: Path,
+    experiment_dir: Path,
    num_runs: int,
    dry_run: bool,
-    link_vars: list[tuple[str, str]],
 ):
    print("[WL START]")
    print(f"Serve parameters: {serve_comb.as_text() or '(None)'}")
@@ -125,10 +124,10 @@ def explore_comb_workloads(
        bench_cmd,
        serve_comb=serve_comb,
        bench_comb=bench_comb | {"max_concurrency": 1},
-        output_dir=output_dir,
+        link_vars=link_vars,
+        experiment_dir=experiment_dir,
        num_runs=num_runs,
        dry_run=dry_run,
-        link_vars=link_vars,
        workload_var=workload_var,
        workload_value=1,
    )
@@ -137,10 +136,10 @@ def explore_comb_workloads(
        bench_cmd,
        serve_comb=serve_comb,
        bench_comb=bench_comb | {"max_concurrency": dataset_size},
-        output_dir=output_dir,
+        link_vars=link_vars,
+        experiment_dir=experiment_dir,
        num_runs=num_runs,
        dry_run=dry_run,
-        link_vars=link_vars,
        workload_var=workload_var,
        workload_value=dataset_size,
    )
@@ -177,10 +176,10 @@ def explore_comb_workloads(
            bench_cmd,
            serve_comb=serve_comb,
            bench_comb=bench_comb,
-            output_dir=output_dir,
+            link_vars=link_vars,
+            experiment_dir=experiment_dir,
            num_runs=num_runs,
            dry_run=dry_run,
-            link_vars=link_vars,
            workload_var=workload_var,
            workload_value=inter_workload_value,
        )
@@ -201,12 +200,12 @@ def explore_combs_workloads(
    server_ready_timeout: int,
    serve_params: ParameterSweep,
    bench_params: ParameterSweep,
+    link_vars: list[tuple[str, str]],
    workload_var: WorkloadVariable,
    workload_iters: int,
-    output_dir: Path,
+    experiment_dir: Path,
    num_runs: int,
    dry_run: bool,
-    link_vars: list[tuple[str, str]],
 ):
    if any(bench_comb.has_param(workload_var) for bench_comb in bench_params):
        raise ValueError(
@@ -223,7 +222,7 @@ def explore_combs_workloads(
            server_ready_timeout=server_ready_timeout,
            serve_comb=serve_comb,
            bench_params=bench_params,
-            output_dir=output_dir,
+            experiment_dir=experiment_dir,
            dry_run=dry_run,
        ) as server:
            for bench_comb in bench_params:
@@ -232,12 +231,12 @@ def explore_combs_workloads(
                    bench_cmd,
                    serve_comb=serve_comb,
                    bench_comb=bench_comb,
+                    link_vars=link_vars,
                    workload_var=workload_var,
                    workload_iters=workload_iters,
-                    output_dir=output_dir,
+                    experiment_dir=experiment_dir,
                    num_runs=num_runs,
                    dry_run=dry_run,
-                    link_vars=link_vars,
                )
                if comb_data is not None:
@@ -247,7 +246,7 @@ def explore_combs_workloads(
        return None
    combined_df = pd.DataFrame.from_records(all_data)
-    combined_df.to_csv(output_dir / "summary.csv")
+    combined_df.to_csv(experiment_dir / "summary.csv")
    return combined_df
@@ -298,13 +297,9 @@ class SweepServeWorkloadArgs(SweepServeArgs):
 def run_main(args: SweepServeWorkloadArgs):
-    timestamp = args.resume or datetime.now().strftime("%Y%m%d_%H%M%S")
+    experiment_dir = args.resolve_experiment_dir()
-    output_dir = args.output_dir / timestamp
-    if args.resume and not output_dir.exists():
-        raise ValueError(f"Cannot resume from non-existent directory ({output_dir})")
-    try:
+    with args.run_ctx(experiment_dir):
        return explore_combs_workloads(
            serve_cmd=args.serve_cmd,
            bench_cmd=args.bench_cmd,
@@ -313,18 +308,13 @@ def run_main(args: SweepServeWorkloadArgs):
            server_ready_timeout=args.server_ready_timeout,
            serve_params=args.serve_params,
            bench_params=args.bench_params,
+            link_vars=args.link_vars,
            workload_var=args.workload_var,
            workload_iters=args.workload_iters,
-            output_dir=output_dir,
+            experiment_dir=experiment_dir,
            num_runs=args.num_runs,
            dry_run=args.dry_run,
-            link_vars=args.link_vars,
        )
-    except BaseException as exc:
-        raise RuntimeError(
-            f"The script was terminated early. Use `--resume {timestamp}` "
-            f"to continue the script from its last checkpoint."
-        ) from exc
 def main(args: argparse.Namespace):

--- a/vllm/benchmarks/sweep/startup.py
+++ b/vllm/benchmarks/sweep/startup.py
@@ -4,6 +4,7 @@ import argparse
 import json
 import shlex
 import subprocess
+from contextlib import contextmanager
 from dataclasses import dataclass
 from datetime import datetime
 from functools import lru_cache
@@ -111,7 +112,7 @@ def _apply_output_json(cmd: list[str], output_path: Path) -> list[str]:
 def _get_comb_base_path(
-    output_dir: Path,
+    experiment_dir: Path,
    serve_comb: ParameterSweepItem,
    startup_comb: ParameterSweepItem,
 ) -> Path:
@@ -120,7 +121,8 @@ def _get_comb_base_path(
        parts.extend(("SERVE-", serve_comb.name))
    if startup_comb:
        parts.extend(("STARTUP-", startup_comb.name))
-    return output_dir / sanitize_filename("-".join(parts))
+    return experiment_dir / sanitize_filename("-".join(parts))
 def _get_comb_run_path(base_path: Path, run_number: int | None) -> Path:
@@ -225,7 +227,7 @@ def run_combs(
    *,
    serve_params: ParameterSweep,
    startup_params: ParameterSweep,
-    output_dir: Path,
+    experiment_dir: Path,
    num_runs: int,
    show_stdout: bool,
    dry_run: bool,
@@ -233,7 +235,7 @@ def run_combs(
    all_data = list[dict[str, object]]()
    for serve_comb in serve_params:
        for startup_comb in startup_params:
-            base_path = _get_comb_base_path(output_dir, serve_comb, startup_comb)
+            base_path = _get_comb_base_path(experiment_dir, serve_comb, startup_comb)
            comb_data = run_comb(
                startup_cmd,
                serve_comb=serve_comb,
@@ -250,7 +252,7 @@ def run_combs(
        return None
    combined_df = pd.DataFrame.from_records(all_data)
-    combined_df.to_csv(output_dir / "summary.csv")
+    combined_df.to_csv(experiment_dir / "summary.csv")
    return combined_df
@@ -260,11 +262,11 @@ class SweepStartupArgs:
    serve_params: ParameterSweep
    startup_params: ParameterSweep
    output_dir: Path
+    experiment_name: str
    num_runs: int
    show_stdout: bool
    dry_run: bool
-    resume: str | None
+    resume: bool
-    strict_params: bool
    parser_name: ClassVar[str] = "startup"
    parser_help: ClassVar[str] = (
@@ -286,13 +288,19 @@ class SweepStartupArgs:
            startup_params = ParameterSweep.from_records([{}])
        supported = _get_supported_startup_keys()
+        strict_params = args.strict_params
        serve_params = _filter_params(
-            serve_params, supported=supported, strict=args.strict_params
+            serve_params, supported=supported, strict=strict_params
        )
        startup_params = _filter_params(
-            startup_params, supported=supported, strict=args.strict_params
+            startup_params, supported=supported, strict=strict_params
        )
+        if args.experiment_name:
+            experiment_name = args.experiment_name
+        else:
+            experiment_name = datetime.now().strftime("%Y%m%d_%H%M%S")
        if args.num_runs < 1:
            raise ValueError("`num_runs` should be at least 1.")
@@ -301,11 +309,11 @@ class SweepStartupArgs:
            serve_params=serve_params,
            startup_params=startup_params,
            output_dir=Path(args.output_dir),
+            experiment_name=experiment_name,
            num_runs=args.num_runs,
            show_stdout=args.show_stdout,
            dry_run=args.dry_run,
            resume=args.resume,
-            strict_params=args.strict_params,
        )
    @classmethod
@@ -316,6 +324,7 @@ class SweepStartupArgs:
            default="vllm bench startup",
            help="The command used to run the startup benchmark.",
        )
        parser.add_argument(
            "--serve-params",
            type=str,
@@ -331,12 +340,27 @@ class SweepStartupArgs:
            help="Path to JSON file containing parameter combinations "
            "for the `vllm bench startup` command.",
        )
+        parser.add_argument(
+            "--strict-params",
+            action="store_true",
+            help="If set, unknown parameters in sweep files raise an error "
+            "instead of being ignored.",
+        )
        parser.add_argument(
            "-o",
            "--output-dir",
            type=str,
            default="results",
-            help="The directory to which results are written.",
+            help="The main directory to which results are written.",
+        )
+        parser.add_argument(
+            "-e",
+            "--experiment-name",
+            type=str,
+            default=None,
+            help="The name of this experiment (defaults to current timestamp). "
+            "Results will be stored under `output_dir/experiment_name`.",
        )
        parser.add_argument(
            "--num-runs",
@@ -357,43 +381,56 @@ class SweepStartupArgs:
        )
        parser.add_argument(
            "--resume",
-            type=str,
-            default=None,
-            help="Set this to the name of a directory under `output_dir` (which is a "
-            "timestamp) to resume a previous execution of this script, i.e., only run "
-            "parameter combinations for which there are still no output files.",
-        )
-        parser.add_argument(
-            "--strict-params",
            action="store_true",
-            help="If set, unknown parameters in sweep files raise an error "
+            help="Resume a previous execution of this script, i.e., only run "
-            "instead of being ignored.",
+            "parameter combinations for which there are still no output files "
+            "under `output_dir/experiment_name`.",
        )
        return parser
+    def resolve_experiment_dir(self) -> Path:
+        experiment_dir = self.output_dir / self.experiment_name
-def run_main(args: SweepStartupArgs):
+        if self.resume:
-    timestamp = args.resume or datetime.now().strftime("%Y%m%d_%H%M%S")
+            if not experiment_dir.exists():
-    output_dir = args.output_dir / timestamp
+                raise ValueError(f"Cannot resume from non-existent {experiment_dir=}")
+        else:
+            if experiment_dir.exists():
+                raise ValueError(f"Cannot overwrite existing {experiment_dir=}")
-    if args.resume and not output_dir.exists():
+        return experiment_dir
-        raise ValueError(f"Cannot resume from non-existent directory ({output_dir})")
+    @contextmanager
+    def run_ctx(self, experiment_dir: Path):
+        if self.dry_run:
+            yield
+            print(f"Experiment will be saved at: {experiment_dir}")
+            return
        try:
+            yield
+            print(f"Experiment has been saved at: {experiment_dir}")
+        except BaseException as exc:
+            raise RuntimeError(
+                "The script was terminated early. Use `--resume` "
+                "to continue the script from its last checkpoint."
+            ) from exc
+def run_main(args: SweepStartupArgs):
+    experiment_dir = args.resolve_experiment_dir()
+    with args.run_ctx(experiment_dir):
        return run_combs(
            startup_cmd=args.startup_cmd,
            serve_params=args.serve_params,
            startup_params=args.startup_params,
-            output_dir=output_dir,
+            experiment_dir=experiment_dir,
            num_runs=args.num_runs,
            show_stdout=args.show_stdout,
            dry_run=args.dry_run,
        )
-    except BaseException as exc:
-        raise RuntimeError(
-            f"The script was terminated early. Use `--resume {timestamp}` "
-            f"to continue the script from its last checkpoint."
-        ) from exc
 def main(args: argparse.Namespace):