feat: update experiment_runner (#5360)

39d90449 · Yineng Zhang · GitHub · 39e41138 · 39d90449 · 39d90449
Unverified Commit 39d90449 authored Apr 13, 2025 by Yineng Zhang Committed by GitHub Apr 13, 2025
Showing with 81 additions and 1 deletion

test/srt/configs/llama_405b.yaml test/srt/configs/llama_405b.yaml +28 -0

test/srt/experiment_runner.py test/srt/experiment_runner.py +7 -1

test/srt/parse_results.py test/srt/parse_results.py +46 -0

No files found.
--- a/test/srt/configs/llama_405b.yaml
+++ b/test/srt/configs/llama_405b.yaml
+tasks:
+  - name: sglang-8192-1024-concurrency1
+    server_cmd: python3 -m sglang.launch_server --model nvidia/Llama-3.1-405B-Instruct-FP8 --tp 8
+    client_cmd: python3 -m sglang.bench_serving --random-range-ratio 1 --random-input-len 8192 --random-output-len 1024 --max-concurrency 1 --num-prompts 5 --output-file llama_405b_results.jsonl
+
+  - name: sglang-8192-1024-concurrency2
+    server_cmd: python3 -m sglang.launch_server --model nvidia/Llama-3.1-405B-Instruct-FP8 --tp 8
+    client_cmd: python3 -m sglang.bench_serving --random-range-ratio 1 --random-input-len 8192 --random-output-len 1024 --max-concurrency 2 --num-prompts 10 --output-file llama_405b_results.jsonl
+
+  - name: sglang-8192-1024-concurrency4
+    server_cmd: python3 -m sglang.launch_server --model nvidia/Llama-3.1-405B-Instruct-FP8 --tp 8
+    client_cmd: python3 -m sglang.bench_serving --random-range-ratio 1 --random-input-len 8192 --random-output-len 1024 --max-concurrency 4 --num-prompts 20 --output-file llama_405b_results.jsonl
+
+  - name: sglang-8192-1024-concurrency8
+    server_cmd: python3 -m sglang.launch_server --model nvidia/Llama-3.1-405B-Instruct-FP8 --tp 8
+    client_cmd: python3 -m sglang.bench_serving --random-range-ratio 1 --random-input-len 8192 --random-output-len 1024 --max-concurrency 8 --num-prompts 32 --output-file llama_405b_results.jsonl
+
+  - name: sglang-8192-1024-concurrency16
+    server_cmd: python3 -m sglang.launch_server --model nvidia/Llama-3.1-405B-Instruct-FP8 --tp 8
+    client_cmd: python3 -m sglang.bench_serving --random-range-ratio 1 --random-input-len 8192 --random-output-len 1024 --max-concurrency 16 --num-prompts 48 --output-file llama_405b_results.jsonl
+
+  - name: sglang-8192-1024-concurrency24
+    server_cmd: python3 -m sglang.launch_server --model nvidia/Llama-3.1-405B-Instruct-FP8 --tp 8
+    client_cmd: python3 -m sglang.bench_serving --random-range-ratio 1 --random-input-len 8192 --random-output-len 1024 --max-concurrency 24 --num-prompts 72 --output-file llama_405b_results.jsonl
+
+  - name: sglang-8192-1024-concurrency32
+    server_cmd: python3 -m sglang.launch_server --model nvidia/Llama-3.1-405B-Instruct-FP8 --tp 8
+    client_cmd: python3 -m sglang.bench_serving --random-range-ratio 1 --random-input-len 8192 --random-output-len 1024 --max-concurrency 32 --num-prompts 96 --output-file llama_405b_results.jsonl
--- a/test/srt/experiment_runner.py
+++ b/test/srt/experiment_runner.py
@@ -317,6 +317,11 @@ def format_results(results: List[TaskResult]) -> str:
    return "\n".join(output)


+def get_bool_env_var(name: str, default: str = "false") -> bool:
+    value = os.getenv(name, default)
+    return value.lower() in ("true", "1")
+
+
 def write_in_github_step_summary(results: List[TaskResult]):
    """Write formatted results to GitHub step summary."""
    if not os.environ.get("GITHUB_STEP_SUMMARY"):
@@ -349,7 +354,8 @@ def main():
            result = runner.run_task(config)
            results.append(result)

-        write_in_github_step_summary(results)
+        if get_bool_env_var("SGLANG_IS_IN_CI"):
+            write_in_github_step_summary(results)
    except Exception as e:
        logger.error(f"Error: {e}")
        raise

--- a/test/srt/parse_results.py
+++ b/test/srt/parse_results.py
+import json
+import pandas as pd
+import argparse
+import os
+from tabulate import tabulate
+
+# Parse command-line arguments
+parser = argparse.ArgumentParser(description="Parse JSONL benchmark and summarize.")
+parser.add_argument("input_file", type=str, help="Path to input JSONL file")
+args = parser.parse_args()
+
+input_file = args.input_file
+base_name = os.path.splitext(os.path.basename(input_file))[0]
+output_file = f"{base_name}_summary.csv"
+
+fields = [
+    "max_concurrency",
+    "output_throughput",
+    "mean_ttft_ms",
+    "median_ttft_ms",
+    "p99_ttft_ms",
+    "mean_tpot_ms",
+    "median_tpot_ms",
+    "p99_tpot_ms",
+]
+
+# Read JSONL and parse
+results = []
+with open(input_file, "r") as f:
+    for line in f:
+        data = json.loads(line)
+        row = {field: data.get(field, None) for field in fields}
+        max_conc = data.get("max_concurrency")
+        out_tp = data.get("output_throughput")
+        row["per_user_throughput"] = out_tp / max_conc if max_conc else None
+        results.append(row)
+
+# Convert to DataFrame
+df = pd.DataFrame(results)
+
+# Save to CSV
+df.to_csv(output_file, index=False)
+print(f"\nSaved summary to: {output_file}\n")
+
+# Print ASCII table
+print(tabulate(df, headers="keys", tablefmt="grid", floatfmt=".3f"))