Unverified Commit 20b7a8ae authored by hhzhang16's avatar hhzhang16 Committed by GitHub
Browse files

feat: remove kubectl dependencies from benchmarking (#3098)


Signed-off-by: default avatarHannah Zhang <hannahz@nvidia.com>
Signed-off-by: default avatarhhzhang16 <54051230+hhzhang16@users.noreply.github.com>
Co-authored-by: default avatarcoderabbitai[bot] <136622811+coderabbitai[bot]@users.noreply.github.com>
parent 3b6dbef2
......@@ -24,10 +24,10 @@ First, deploy your DynamoGraphDeployment using the [deployment documentation](..
```bash
# Port-forward your deployment to http://localhost:8000
kubectl port-forward -n <namespace> svc/<frontend-service-name> 8000:8000 &
kubectl port-forward -n <namespace> svc/<frontend-service-name> 8000:8000 > /dev/null 2>&1 &
# Run benchmark
python3 -m benchmarks.utils.benchmark --namespace <namespace> \
python3 -m benchmarks.utils.benchmark \
--input my-benchmark=http://localhost:8000 \
--model "<your-model>"
......
......@@ -4,24 +4,39 @@
# SPDX-License-Identifier: Apache-2.0
import argparse
import asyncio
import re
import sys
from typing import Tuple
from typing import Dict, Tuple
from benchmarks.utils.workflow import categorize_inputs, run_benchmark_workflow
from benchmarks.utils.workflow import run_benchmark_workflow
def validate_inputs(inputs: Dict[str, str]) -> None:
"""Validate that all inputs are HTTP endpoints"""
for label, value in inputs.items():
if not value.lower().startswith(("http://", "https://")):
raise ValueError(
f"Input '{label}' must be an HTTP endpoint (starting with http:// or https://). Got: {value}"
)
# Validate reserved labels
if label.lower() == "plots":
raise ValueError(
"Label 'plots' is reserved and cannot be used. Please choose a different label."
)
def parse_input(input_str: str) -> Tuple[str, str]:
"""Parse input string in format key=value with additional validation"""
if "=" not in input_str:
raise ValueError(
f"Invalid input format. Expected: <label>=<manifest_path_or_endpoint>, got: {input_str}"
f"Invalid input format. Expected: <label>=<endpoint>, got: {input_str}"
)
parts = input_str.split("=", 1) # Split on first '=' only
if len(parts) != 2:
raise ValueError(
f"Invalid input format. Expected: <label>=<manifest_path_or_endpoint>, got: {input_str}"
f"Invalid input format. Expected: <label>=<endpoint>, got: {input_str}"
)
label, value = parts
......@@ -35,8 +50,6 @@ def parse_input(input_str: str) -> Tuple[str, str]:
value = value.strip()
# Validate label characters
import re
if not re.match(r"^[a-zA-Z0-9_-]+$", label):
raise ValueError(
f"Label must contain only letters, numbers, hyphens, and underscores. Invalid label: {label}"
......@@ -51,9 +64,8 @@ def main() -> int:
"--input",
action="append",
dest="inputs",
help="Input in format <label>=<manifest_path_or_endpoint>. Can be specified multiple times for comparisons.",
help="Input in format <label>=<endpoint>. Can be specified multiple times for comparisons.",
)
parser.add_argument("--namespace", required=True, help="Kubernetes namespace")
parser.add_argument("--isl", type=int, default=2000, help="Input sequence length")
parser.add_argument(
"--std",
......@@ -102,23 +114,21 @@ def main() -> int:
)
print()
endpoints, manifests = categorize_inputs(parsed_inputs)
# Validate that all inputs are HTTP endpoints
validate_inputs(parsed_inputs)
except (ValueError, FileNotFoundError) as e:
except ValueError as e:
print(f"ERROR: {e}")
return 1
# Run the benchmark workflow with the parsed inputs
asyncio.run(
run_benchmark_workflow(
namespace=args.namespace,
inputs=parsed_inputs,
isl=args.isl,
std=args.std,
osl=args.osl,
model=args.model,
output_dir=args.output_dir,
)
run_benchmark_workflow(
inputs=parsed_inputs,
isl=args.isl,
std=args.std,
osl=args.osl,
model=args.model,
output_dir=args.output_dir,
)
return 0
......
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
from dataclasses import dataclass
from pathlib import Path
from typing import Callable, Dict, List, Tuple
from typing import Dict, List
from benchmarks.utils.genai import run_concurrency_sweep
from benchmarks.utils.plot import generate_plots
from deploy.utils.dynamo_deployment import DynamoDeploymentClient
@dataclass
class DeploymentConfig:
"""Configuration for a single deployment type"""
name: str # Human-readable name (e.g., "aggregated")
manifest_path: str # Path to deployment manifest
output_subdir: str # Subdirectory name for results (e.g., "agg")
client_factory: Callable # Function to create the client
deploy_func: Callable # Function to deploy the client
def create_dynamo_client(
namespace: str, deployment_name: str
) -> DynamoDeploymentClient:
"""Factory function for DynamoDeploymentClient"""
return DynamoDeploymentClient(namespace=namespace, deployment_name=deployment_name)
async def deploy_dynamo_client(
client: DynamoDeploymentClient, manifest_path: str
) -> None:
"""Deploy a DynamoDeploymentClient"""
await client.create_deployment(manifest_path)
await client.wait_for_deployment_ready(timeout=1800)
async def teardown(client) -> None:
"""Clean up deployment and stop port forwarding"""
try:
if hasattr(client, "stop_port_forward"):
client.stop_port_forward()
await client.delete_deployment()
except Exception:
pass
def print_deployment_start(config: DeploymentConfig, output_dir: str) -> None:
"""Print deployment start messages"""
print(f"🚀 Starting {config.name} deployment benchmark...")
print(f"📄 Manifest: {config.manifest_path}")
print(f"📁 Results will be saved to: {Path(output_dir) / config.output_subdir}")
def print_concurrency_start(
deployment_name: str, model: str, isl: int, osl: int, std: int
label: str, model: str, isl: int, osl: int, std: int
) -> None:
"""Print concurrency sweep start messages"""
print(f"⚙️ Starting {deployment_name} concurrency sweep!", flush=True)
print(f"⚙️ Starting {label} concurrency sweep!", flush=True)
print(
"⏱️ This may take several minutes - running through multiple concurrency levels...",
flush=True,
......@@ -65,65 +20,22 @@ def print_concurrency_start(
print(f"🎯 Model: {model} | ISL: {isl} | OSL: {osl} | StdDev: {std}")
def print_deployment_complete(config: DeploymentConfig) -> None:
"""Print deployment completion message"""
print(f"✅ {config.name.title()} deployment benchmark completed successfully!")
def print_deployment_skip(deployment_type: str) -> None:
"""Print deployment skip message"""
print(f"⏭️ Skipping {deployment_type} deployment (not specified)")
async def run_single_deployment_benchmark(
config: DeploymentConfig,
namespace: str,
output_dir: str,
model: str,
isl: int,
osl: int,
std: int,
) -> None:
"""Run benchmark for a single deployment type"""
print_deployment_start(config, output_dir)
# Create and deploy client
client = config.client_factory(namespace, config.output_subdir)
await config.deploy_func(client, config.manifest_path)
try:
print_concurrency_start(config.name, model, isl, osl, std)
# Run concurrency sweep
(Path(output_dir) / config.output_subdir).mkdir(parents=True, exist_ok=True)
run_concurrency_sweep(
service_url=client.port_forward_frontend(quiet=True),
model_name=model,
isl=isl,
osl=osl,
stddev=std,
output_dir=Path(output_dir) / config.output_subdir,
)
finally:
await teardown(client)
print_deployment_complete(config)
async def run_endpoint_benchmark(
def run_endpoint_benchmark(
label: str,
endpoint: str,
model: str,
isl: int,
osl: int,
std: int,
output_dir: str,
output_dir: Path,
) -> None:
"""Run benchmark for an existing endpoint with custom label"""
print(f"🚀 Starting benchmark of endpoint '{label}': {endpoint}")
print(f"📁 Results will be saved to: {Path(output_dir) / label}")
print_concurrency_start(f"endpoint ({label})", model, isl, osl, std)
print(f"📁 Results will be saved to: {output_dir / label}")
print_concurrency_start(label, model, isl, osl, std)
# Create output directory
(output_dir / label).mkdir(parents=True, exist_ok=True)
run_concurrency_sweep(
service_url=endpoint,
......@@ -131,122 +43,45 @@ async def run_endpoint_benchmark(
isl=isl,
osl=osl,
stddev=std,
output_dir=Path(output_dir) / label,
output_dir=output_dir / label,
)
print("✅ Endpoint benchmark completed successfully!")
def print_final_summary(output_dir: str, deployed_types: List[str]) -> None:
def print_final_summary(output_dir: Path, labels: List[str]) -> None:
"""Print final benchmark summary"""
print("📊 Generating performance plots...")
generate_plots(
base_output_dir=Path(output_dir), output_dir=Path(output_dir) / "plots"
)
print(f"📈 Plots saved to: {Path(output_dir) / 'plots'}")
print(f"📋 Summary saved to: {Path(output_dir) / 'SUMMARY.txt'}")
generate_plots(base_output_dir=output_dir, output_dir=output_dir / "plots")
print(f"📈 Plots saved to: {output_dir / 'plots'}")
print(f"📋 Summary saved to: {output_dir / 'plots' / 'SUMMARY.txt'}")
print()
print("🎉 Benchmark workflow completed successfully!")
print(f"📁 All results available at: {output_dir}")
if deployed_types:
print(f"🚀 Benchmarked deployments: {', '.join(deployed_types)}")
if labels:
print(f"🚀 Benchmarked: {', '.join(labels)}")
print(f"📊 View plots at: {Path(output_dir) / 'plots'}")
print(f"📊 View plots at: {output_dir / 'plots'}")
def categorize_inputs(inputs: Dict[str, str]) -> Tuple[Dict[str, str], Dict[str, str]]:
"""Categorize inputs into endpoints and manifests"""
endpoints = {}
manifests = {}
for label, value in inputs.items():
# Validate reserved labels
if label.lower() == "plots":
raise ValueError(
"Label 'plots' is reserved and cannot be used. Please choose a different label."
)
if value.startswith(("http://", "https://")):
endpoints[label] = value
else:
# It should be a file path - validate it exists
if not Path(value).is_file():
raise FileNotFoundError(
f"Manifest file not found for input '{label}': {value}"
)
manifests[label] = value
return endpoints, manifests
def validate_dynamo_manifest(manifest_path: str) -> None:
"""Validate that the manifest is a DynamoGraphDeployment"""
try:
with open(manifest_path, "r") as f:
content = f.read()
# Check for DynamoGraphDeployment
if "kind: DynamoGraphDeployment" not in content:
raise ValueError(
f"Manifest {manifest_path} is not a DynamoGraphDeployment. Only DynamoGraphDeployments are supported for deployment benchmarking."
)
except FileNotFoundError:
raise FileNotFoundError(f"Manifest file not found: {manifest_path}")
except Exception as e:
raise ValueError(f"Error reading manifest {manifest_path}: {e}")
async def run_benchmark_workflow(
namespace: str,
def run_benchmark_workflow(
inputs: Dict[str, str],
isl: int = 200,
isl: int = 2000,
std: int = 10,
osl: int = 200,
model: str = "nvidia/Llama-3.1-8B-Instruct-FP8",
osl: int = 256,
model: str = "Qwen/Qwen3-0.6B",
output_dir: str = "benchmarks/results",
) -> None:
"""Main benchmark workflow orchestrator with dynamic inputs"""
Path(output_dir).mkdir(parents=True, exist_ok=True)
# Categorize inputs into endpoints and manifests
endpoints, manifests = categorize_inputs(inputs)
"""Main benchmark workflow orchestrator for HTTP endpoints only"""
output_dir_path = Path(output_dir)
output_dir_path.mkdir(parents=True, exist_ok=True)
# Run endpoint benchmarks
for label, endpoint in endpoints.items():
await run_endpoint_benchmark(label, endpoint, model, isl, osl, std, output_dir)
# Create deployment configurations for manifests
deployment_configs = []
for label, manifest_path in manifests.items():
# Validate that it's a DynamoGraphDeployment
validate_dynamo_manifest(manifest_path)
config = DeploymentConfig(
name=label,
manifest_path=manifest_path,
output_subdir=label,
client_factory=create_dynamo_client,
deploy_func=deploy_dynamo_client,
)
deployment_configs.append(config)
# Run benchmarks for each deployment type
deployed_labels = list(endpoints.keys())
for config in deployment_configs:
await run_single_deployment_benchmark(
config=config,
namespace=namespace,
output_dir=output_dir,
model=model,
isl=isl,
osl=osl,
std=std,
)
deployed_labels.append(config.name)
benchmarked_labels = []
for label, endpoint in inputs.items():
run_endpoint_benchmark(label, endpoint, model, isl, osl, std, output_dir_path)
benchmarked_labels.append(label)
# Generate final summary
print_final_summary(output_dir, deployed_labels)
print_final_summary(output_dir_path, benchmarked_labels)
......@@ -41,7 +41,12 @@ The framework is a Python-based wrapper around `genai-perf` that:
3. **kubectl access** - You need `kubectl` installed and configured to access your Kubernetes cluster.
4. **Benchmark dependencies** - Since benchmarks run locally, you need to install the required Python dependencies. Install them using:
4. **HTTP endpoints** - Ensure you have HTTP endpoints available for benchmarking. These can be:
- DynamoGraphDeployments exposed via HTTP endpoints
- External services (vLLM, llm-d, AIBrix, etc.)
- Any HTTP endpoint serving HuggingFace-compatible models
5. **Benchmark dependencies** - Since benchmarks run locally, you need to install the required Python dependencies. Install them using:
```bash
pip install -r deploy/utils/requirements.txt
```
......@@ -59,11 +64,11 @@ Deploy your DynamoGraphDeployments separately using the [deployment documentatio
### Step 3: Port-Forward and Benchmark Deployment A
```bash
# Port-forward the frontend service for deployment A
kubectl port-forward -n <namespace> svc/<frontend-service-name> 8000:8000 &
kubectl port-forward -n <namespace> svc/<frontend-service-name> 8000:8000 > /dev/null 2>&1 &
# Note: remember to stop the port-forward process after benchmarking.
# Benchmark deployment A using Python scripts
python3 -m benchmarks.utils.benchmark --namespace <namespace> \
python3 -m benchmarks.utils.benchmark \
--input deployment-a=http://localhost:8000 \
--model "your-model-name" \
--output-dir ./benchmarks/results
......@@ -75,10 +80,10 @@ If comparing multiple deployments, teardown deployment A and deploy deployment B
### Step 5: [If Comparative] Port-Forward and Benchmark Deployment B
```bash
# Port-forward the frontend service for deployment B
kubectl port-forward -n <namespace> <frontend-service-name> 8001:8000 &
kubectl port-forward -n <namespace> svc/<frontend-service-name> 8001:8000 > /dev/null 2>&1 &
# Benchmark deployment B using Python scripts
python3 -m benchmarks.utils.benchmark --namespace <namespace> \
python3 -m benchmarks.utils.benchmark \
--input deployment-b=http://localhost:8001 \
--model "your-model-name" \
--output-dir ./benchmarks/results
......@@ -90,35 +95,6 @@ python3 -m benchmarks.utils.benchmark --namespace <namespace> \
python3 -m benchmarks.utils.plot --data-dir ./benchmarks/results
```
## Example Commands
### Single Deployment Benchmark
```bash
# Port-forward and benchmark a single deployment
kubectl port-forward -n my-namespace svc/my-frontend-service 8000:8000 &
python3 -m benchmarks.utils.benchmark --namespace my-namespace \
--input my-deployment=http://localhost:8000 \
--model "meta-llama/Meta-Llama-3-8B"
```
### Comparative Benchmark
```bash
# Benchmark deployment A
kubectl port-forward -n my-namespace svc/agg-frontend 8000:8000 &
python3 -m benchmarks.utils.benchmark --namespace my-namespace \
--input aggregated=http://localhost:8000 \
--model "meta-llama/Meta-Llama-3-8B"
# Benchmark deployment B (different port)
kubectl port-forward -n my-namespace svc/disagg-frontend 8001:8000 &
python3 -m benchmarks.utils.benchmark --namespace my-namespace \
--input disaggregated=http://localhost:8001 \
--model "meta-llama/Meta-Llama-3-8B"
# Generate comparison plots
python3 -m benchmarks.utils.plot --data-dir ./benchmarks/results
```
## Use Cases
The benchmarking framework supports various comparative analysis scenarios:
......@@ -135,12 +111,11 @@ The benchmarking framework supports various comparative analysis scenarios:
### Command Line Options
```bash
python3 -m benchmarks.utils.benchmark --namespace NAMESPACE --input <label>=<endpoint_url> [--input <label>=<endpoint_url>]... [OPTIONS]
python3 -m benchmarks.utils.benchmark --input <label>=<endpoint_url> [--input <label>=<endpoint_url>]... [OPTIONS]
REQUIRED:
-n, --namespace NAMESPACE Kubernetes namespace
--input <label>=<endpoint_url> Benchmark input with custom label
- <label>: becomes the name/label in plots
- <label>: becomes the name/label in plots (see Important Notes for restrictions)
- <endpoint_url>: HTTP endpoint URL (e.g., http://localhost:8000)
Can be specified multiple times for comparisons
......@@ -179,35 +154,8 @@ The Python plotting module:
The benchmarking framework supports any HuggingFace-compatible LLM model. Specify your model in the benchmark script's `--model` parameter. It must match the model name of the deployment. You can override the default sequence lengths (2000/256 tokens) with `--isl` and `--osl` flags if needed for your specific workload.
### Python Script Usage
The benchmarking framework is built around Python modules that provide direct control over the benchmark workflow:
```bash
# Endpoint benchmarking
python3 -u -m benchmarks.utils.benchmark \
--input experiment-a=http://your-endpoint:8000 \
--namespace $NAMESPACE \
--isl 2000 \
--std 10 \
--osl 256 \
--output-dir $OUTPUT_DIR
# Deployment benchmarking (any combination)
python3 -u -m benchmarks.utils.benchmark \
--input experiment-a=http://localhost:8000 \
--input experiment-b=http://localhost:8005 \
--namespace my-namespace \
--isl 2000 \
--std 10 \
--osl 256 \
--output-dir ./benchmarks/results
# Generate plots separately
python3 -m benchmarks.utils.plot --data-dir $OUTPUT_DIR
```
The benchmarking framework is built around Python modules that provide direct control over the benchmark workflow. The Python benchmarking module connects to your existing endpoints, runs the benchmarks, and can generate plots. Deployment is user-managed and out of scope for this tool.
**Note**: The Python benchmarking module connects to your existing endpoints, runs the benchmarks, and can generate plots. Deployment is user-managed and out of scope for this tool.
### Comparison Limitations
The plotting system supports up to 12 different inputs in a single comparison. If you need to compare more than 12 different deployments/endpoints, consider running separate benchmark sessions or grouping related comparisons together.
......@@ -218,11 +166,13 @@ You can customize the concurrency levels using the CONCURRENCIES environment var
```bash
# Custom concurrency levels
CONCURRENCIES="1,5,20,50" python3 -m benchmarks.utils.benchmark --namespace $NAMESPACE --input my-test=http://localhost:8000
CONCURRENCIES="1,5,20,50" python3 -m benchmarks.utils.benchmark \
--input my-test=http://localhost:8000
# Or set permanently
export CONCURRENCIES="1,2,5,10,25,50,100"
python3 -m benchmarks.utils.benchmark --namespace $NAMESPACE --input test=http://localhost:8000
python3 -m benchmarks.utils.benchmark \
--input test=http://localhost:8000
```
## Understanding Your Results
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment