Support outputing details for bench_serving (#6107)

6d95a35a · fzyzcjy · GitHub · 01d2838c · 6d95a35a
Unverified Commit 6d95a35a authored May 18, 2025 by fzyzcjy Committed by GitHub May 17, 2025
Hide whitespace changes
Inline Side-by-side

Showing with 22 additions and 13 deletions

python/sglang/bench_serving.py python/sglang/bench_serving.py +22 -13

No files found.
--- a/python/sglang/bench_serving.py
+++ b/python/sglang/bench_serving.py
@@ -1380,21 +1380,24 @@ async def benchmark(
        else:
            output_file_name = f"{args.backend}_{now}_{args.num_prompts}_sharegpt.jsonl"
+    result_details = {
+        "input_lens": [output.prompt_len for output in outputs],
+        "output_lens": output_lens,
+        "ttfts": [output.ttft for output in outputs],
+        "itls": [output.itl for output in outputs],
+        "generated_texts": [output.generated_text for output in outputs],
+        "errors": [output.error for output in outputs],
+    }
    # Append results to a JSONL file
    with open(output_file_name, "a") as file:
-        file.write(json.dumps(result) + "\n")
+        if args.output_details:
+            result_for_dump = result | result_details
-    result.update(
+        else:
-        {
+            result_for_dump = result
-            "input_lens": [output.prompt_len for output in outputs],
+        file.write(json.dumps(result_for_dump) + "\n")
-            "output_lens": output_lens,
-            "ttfts": [output.ttft for output in outputs],
+    return result | result_details
-            "itls": [output.itl for output in outputs],
-            "generated_texts": [output.generated_text for output in outputs],
-            "errors": [output.error for output in outputs],
-        }
-    )
-    return result
 def check_chat_template(model_path):
@@ -1424,6 +1427,9 @@ def run_benchmark(args_: argparse.Namespace):
    if not hasattr(args, "warmup_requests"):
        args.warmup_requests = 1
+    if not hasattr(args, "output_details"):
+        args.output_details = False
    print(f"benchmark_args={args}")
    # Set global environments
@@ -1668,6 +1674,9 @@ if __name__ == "__main__":
        "if the server is not processing requests fast enough to keep up.",
    )
    parser.add_argument("--output-file", type=str, help="Output JSONL file name.")
+    parser.add_argument(
+        "--output-details", action="store_true", help="Output details of benchmarking."
+    )
    parser.add_argument(
        "--disable-tqdm",
        action="store_true",