Unverified Commit 4800d0be authored by hhzhang16's avatar hhzhang16 Committed by GitHub
Browse files

feat: allow users to only plot certain subdirs (#3190)


Signed-off-by: default avatarHannah Zhang <hannahz@nvidia.com>
parent 52c75363
......@@ -33,6 +33,9 @@ python3 -m benchmarks.utils.benchmark \
# Generate plots
python3 -m benchmarks.utils.plot --data-dir ./benchmarks/results
# Or plot only specific benchmark experiments
python3 -m benchmarks.utils.plot --data-dir ./benchmarks/results --benchmark-name my-benchmark
```
## Features
......
......@@ -4,7 +4,7 @@
import json
import re
from pathlib import Path
from typing import Dict, List, Tuple
from typing import Dict, List, Optional, Tuple
import matplotlib.pyplot as plt
......@@ -260,26 +260,40 @@ def create_efficiency_plot(
print(f"Saved efficiency plot: {output_path}")
def generate_plots(base_output_dir: Path, output_dir: Path) -> None:
def generate_plots(
base_output_dir: Path, output_dir: Path, benchmark_names: Optional[List[str]] = None
) -> None:
"""
Generate performance plots from benchmark results.
Args:
base_output_dir: Base directory containing benchmark results
output_dir: Directory to save plots
benchmark_names: Optional list of specific benchmark names to plot. If None, plots all subdirectories.
"""
print(f"Generating plots from results in {base_output_dir}")
if not base_output_dir.exists():
print(f"Results directory does not exist: {base_output_dir}")
return
# Create plots directory
output_dir.mkdir(exist_ok=True)
output_dir.mkdir(parents=True, exist_ok=True)
# Parse results for each deployment type
deployment_results = {}
# Find all subdirectories that contain benchmark results
names_set = set(benchmark_names) if benchmark_names is not None else None
for item in base_output_dir.iterdir():
if item.is_dir() and item.name != "plots":
deployment_type = item.name
# If benchmark_names is specified, only process those directories
if names_set is not None and deployment_type not in names_set:
print(f"Skipping {deployment_type} (not in specified benchmark names)")
continue
results = parse_benchmark_results(item)
if results:
deployment_results[deployment_type] = results
......@@ -288,8 +302,21 @@ def generate_plots(base_output_dir: Path, output_dir: Path) -> None:
print(f"No valid results found for {deployment_type}")
if not deployment_results:
print("No benchmark results found to plot!")
return
if benchmark_names:
available = sorted(
[
p.name
for p in base_output_dir.iterdir()
if p.is_dir() and p.name != "plots"
]
)
missing = sorted([n for n in benchmark_names if n not in available])
print(f"No benchmark results found for specified names: {benchmark_names}")
if missing:
print(f"Missing (not found under {base_output_dir}): {missing}")
print(f"Available experiments: {available}")
else:
print("No benchmark results found to plot!")
# 1. P50 Inter-token Latency vs Concurrency
p50_data = []
......@@ -416,15 +443,22 @@ if __name__ == "__main__":
parser.add_argument(
"--output-dir", help="Output directory for plots (defaults to data-dir/plots)"
)
parser.add_argument(
"--benchmark-name",
action="append",
help="Specific benchmark experiment name to plot (can be specified multiple times). If not specified, plots all subdirectories.",
)
args = parser.parse_args()
data_dir = Path(args.data_dir)
benchmark_names = args.benchmark_name if args.benchmark_name else None
if args.output_dir:
# If output dir specified, use it as base and call generate_plots
output_dir = Path(args.output_dir)
output_dir.mkdir(parents=True, exist_ok=True)
generate_plots(data_dir, output_dir)
generate_plots(data_dir, output_dir, benchmark_names)
else:
# Use data_dir as base output dir
generate_plots(data_dir, data_dir / "plots")
generate_plots(data_dir, data_dir / "plots", benchmark_names)
......@@ -134,6 +134,9 @@ python3 -m benchmarks.utils.benchmark \
```bash
# Generate plots and summary using Python plotting script
python3 -m benchmarks.utils.plot --data-dir ./benchmarks/results
# Or plot only specific benchmark experiments
python3 -m benchmarks.utils.plot --data-dir ./benchmarks/results --benchmark-name experiment-a --benchmark-name experiment-b
```
## Use Cases
......@@ -191,6 +194,28 @@ The Python plotting module:
1. **Generates** comparison plots using your custom labels in `<OUTPUT_DIR>/plots/`
2. **Creates** summary statistics and visualizations
### Plotting Options
The plotting script supports several options for customizing which experiments to visualize:
```bash
# Plot all benchmark experiments in the data directory
python3 -m benchmarks.utils.plot --data-dir ./benchmarks/results
# Plot only specific benchmark experiments
python3 -m benchmarks.utils.plot --data-dir ./benchmarks/results --benchmark-name experiment-a --benchmark-name experiment-b
# Specify custom output directory for plots
python3 -m benchmarks.utils.plot --data-dir ./benchmarks/results --output-dir ./custom-plots
```
**Available Options:**
- `--data-dir`: Directory containing benchmark results (required)
- `--benchmark-name`: Specific benchmark experiment name to plot (can be specified multiple times). Names must match subdirectory names under the data dir.
- `--output-dir`: Custom output directory for plots (defaults to data-dir/plots)
**Note**: If `--benchmark-name` is not specified, the script will plot all subdirectories found in the data directory.
### Using Your Own Models and Configuration
The benchmarking framework supports any HuggingFace-compatible LLM model. Specify your model in the benchmark script's `--model` parameter. It must match the model name of the deployment. You can override the default sequence lengths (2000/256 tokens) with `--isl` and `--osl` flags if needed for your specific workload.
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment