feat: allow users to only plot certain subdirs (#3190)

Signed-off-by: Hannah Zhang <hannahz@nvidia.com>

feat: allow users to only plot certain subdirs (#3190)
Signed-off-by: Hannah Zhang <hannahz@nvidia.com>
4800d0be · hhzhang16 · GitHub · 52c75363 · 4800d0be · 4800d0be
Unverified Commit 4800d0be authored Sep 24, 2025 by hhzhang16 Committed by GitHub Sep 24, 2025
Showing with 69 additions and 7 deletions

benchmarks/README.md benchmarks/README.md +3 -0

benchmarks/utils/plot.py benchmarks/utils/plot.py +41 -7

docs/benchmarks/benchmarking.md docs/benchmarks/benchmarking.md +25 -0

No files found.
--- a/benchmarks/README.md
+++ b/benchmarks/README.md
@@ -33,6 +33,9 @@ python3 -m benchmarks.utils.benchmark \
 # Generate plots
 python3 -m benchmarks.utils.plot --data-dir ./benchmarks/results
+# Or plot only specific benchmark experiments
+python3 -m benchmarks.utils.plot --data-dir ./benchmarks/results --benchmark-name my-benchmark
 ```
 ## Features

--- a/benchmarks/utils/plot.py
+++ b/benchmarks/utils/plot.py
@@ -4,7 +4,7 @@
 import json
 import re
 from pathlib import Path
-from typing import Dict, List, Tuple
+from typing import Dict, List, Optional, Tuple
 import matplotlib.pyplot as plt
@@ -260,26 +260,40 @@ def create_efficiency_plot(
    print(f"Saved efficiency plot: {output_path}")
-def generate_plots(base_output_dir: Path, output_dir: Path) -> None:
+def generate_plots(
+    base_output_dir: Path, output_dir: Path, benchmark_names: Optional[List[str]] = None
+) -> None:
    """
    Generate performance plots from benchmark results.
    Args:
        base_output_dir: Base directory containing benchmark results
        output_dir: Directory to save plots
+        benchmark_names: Optional list of specific benchmark names to plot. If None, plots all subdirectories.
    """
    print(f"Generating plots from results in {base_output_dir}")
+    if not base_output_dir.exists():
+        print(f"Results directory does not exist: {base_output_dir}")
+        return
    # Create plots directory
-    output_dir.mkdir(exist_ok=True)
+    output_dir.mkdir(parents=True, exist_ok=True)
    # Parse results for each deployment type
    deployment_results = {}
    # Find all subdirectories that contain benchmark results
+    names_set = set(benchmark_names) if benchmark_names is not None else None
    for item in base_output_dir.iterdir():
        if item.is_dir() and item.name != "plots":
            deployment_type = item.name
+            # If benchmark_names is specified, only process those directories
+            if names_set is not None and deployment_type not in names_set:
+                print(f"Skipping {deployment_type} (not in specified benchmark names)")
+                continue
            results = parse_benchmark_results(item)
            if results:
                deployment_results[deployment_type] = results
@@ -288,8 +302,21 @@ def generate_plots(base_output_dir: Path, output_dir: Path) -> None:
                print(f"No valid results found for {deployment_type}")
    if not deployment_results:
-        print("No benchmark results found to plot!")
+        if benchmark_names:
-        return
+            available = sorted(
+                [
+                    p.name
+                    for p in base_output_dir.iterdir()
+                    if p.is_dir() and p.name != "plots"
+                ]
+            )
+            missing = sorted([n for n in benchmark_names if n not in available])
+            print(f"No benchmark results found for specified names: {benchmark_names}")
+            if missing:
+                print(f"Missing (not found under {base_output_dir}): {missing}")
+            print(f"Available experiments: {available}")
+        else:
+            print("No benchmark results found to plot!")
    # 1. P50 Inter-token Latency vs Concurrency
    p50_data = []
@@ -416,15 +443,22 @@ if __name__ == "__main__":
    parser.add_argument(
        "--output-dir", help="Output directory for plots (defaults to data-dir/plots)"
    )
+    parser.add_argument(
+        "--benchmark-name",
+        action="append",
+        help="Specific benchmark experiment name to plot (can be specified multiple times). If not specified, plots all subdirectories.",
+    )
    args = parser.parse_args()
    data_dir = Path(args.data_dir)
+    benchmark_names = args.benchmark_name if args.benchmark_name else None
    if args.output_dir:
        # If output dir specified, use it as base and call generate_plots
        output_dir = Path(args.output_dir)
        output_dir.mkdir(parents=True, exist_ok=True)
-        generate_plots(data_dir, output_dir)
+        generate_plots(data_dir, output_dir, benchmark_names)
    else:
        # Use data_dir as base output dir
-        generate_plots(data_dir, data_dir / "plots")
+        generate_plots(data_dir, data_dir / "plots", benchmark_names)
--- a/docs/benchmarks/benchmarking.md
+++ b/docs/benchmarks/benchmarking.md
@@ -134,6 +134,9 @@ python3 -m benchmarks.utils.benchmark \
 ```bash
 # Generate plots and summary using Python plotting script
 python3 -m benchmarks.utils.plot --data-dir ./benchmarks/results
+# Or plot only specific benchmark experiments
+python3 -m benchmarks.utils.plot --data-dir ./benchmarks/results --benchmark-name experiment-a --benchmark-name experiment-b
 ```
 ## Use Cases
@@ -191,6 +194,28 @@ The Python plotting module:
 1. **Generates** comparison plots using your custom labels in `<OUTPUT_DIR>/plots/`
 2. **Creates** summary statistics and visualizations
+### Plotting Options
+The plotting script supports several options for customizing which experiments to visualize:
+```bash
+# Plot all benchmark experiments in the data directory
+python3 -m benchmarks.utils.plot --data-dir ./benchmarks/results
+# Plot only specific benchmark experiments
+python3 -m benchmarks.utils.plot --data-dir ./benchmarks/results --benchmark-name experiment-a --benchmark-name experiment-b
+# Specify custom output directory for plots
+python3 -m benchmarks.utils.plot --data-dir ./benchmarks/results --output-dir ./custom-plots
+```
+**Available Options:**
+- `--data-dir`: Directory containing benchmark results (required)
+- `--benchmark-name`: Specific benchmark experiment name to plot (can be specified multiple times). Names must match subdirectory names under the data dir.
+- `--output-dir`: Custom output directory for plots (defaults to data-dir/plots)
+**Note**: If `--benchmark-name` is not specified, the script will plot all subdirectories found in the data directory.
 ### Using Your Own Models and Configuration
 The benchmarking framework supports any HuggingFace-compatible LLM model. Specify your model in the benchmark script's `--model` parameter. It must match the model name of the deployment. You can override the default sequence lengths (2000/256 tokens) with `--isl` and `--osl` flags if needed for your specific workload.