Fix some issues with benchmark data output (#13641)

Signed-off-by: Huy Do <huydhn@gmail.com>

Fix some issues with benchmark data output (#13641)
Signed-off-by: Huy Do <huydhn@gmail.com>
e7ef74e2 · Huy Do · GitHub · cbae7af5 · e7ef74e2 · e7ef74e2
Unverified Commit e7ef74e2 authored Feb 23, 2025 by Huy Do Committed by GitHub Feb 24, 2025
7 changed files
--- a/.buildkite/nightly-benchmarks/scripts/convert-results-json-to-markdown.py
+++ b/.buildkite/nightly-benchmarks/scripts/convert-results-json-to-markdown.py
@@ -84,8 +84,13 @@ if __name__ == "__main__":
            # this result is generated via `benchmark_serving.py`
            # attach the benchmarking command to raw_result
-            with open(test_file.with_suffix(".commands")) as f:
+            try:
-                command = json.loads(f.read())
+                with open(test_file.with_suffix(".commands")) as f:
+                    command = json.loads(f.read())
+            except OSError as e:
+                print(e)
+                continue
            raw_result.update(command)
            # update the test name of this result
@@ -99,8 +104,13 @@ if __name__ == "__main__":
            # this result is generated via `benchmark_latency.py`
            # attach the benchmarking command to raw_result
-            with open(test_file.with_suffix(".commands")) as f:
+            try:
-                command = json.loads(f.read())
+                with open(test_file.with_suffix(".commands")) as f:
+                    command = json.loads(f.read())
+            except OSError as e:
+                print(e)
+                continue
            raw_result.update(command)
            # update the test name of this result
@@ -121,8 +131,13 @@ if __name__ == "__main__":
            # this result is generated via `benchmark_throughput.py`
            # attach the benchmarking command to raw_result
-            with open(test_file.with_suffix(".commands")) as f:
+            try:
-                command = json.loads(f.read())
+                with open(test_file.with_suffix(".commands")) as f:
+                    command = json.loads(f.read())
+            except OSError as e:
+                print(e)
+                continue
            raw_result.update(command)
            # update the test name of this result

--- a/.buildkite/nightly-benchmarks/scripts/run-performance-benchmarks.sh
+++ b/.buildkite/nightly-benchmarks/scripts/run-performance-benchmarks.sh
@@ -309,11 +309,14 @@ run_serving_tests() {
      new_test_name=$test_name"_qps_"$qps
+      # pass the tensor parallel size to the client so that it can be displayed
+      # on the benchmark dashboard
      client_command="python3 benchmark_serving.py \
        --save-result \
        --result-dir $RESULTS_FOLDER \
        --result-filename ${new_test_name}.json \
        --request-rate $qps \
+        --metadata "tensor_parallel_size=$tp" \
        $client_args"
      echo "Running test case $test_name with qps $qps"

--- a/.buildkite/nightly-benchmarks/tests/throughput-tests.json
+++ b/.buildkite/nightly-benchmarks/tests/throughput-tests.json
@@ -32,4 +32,4 @@
            "backend": "vllm"
        }
    }
 ]
\ No newline at end of file
--- a/benchmarks/benchmark_latency.py
+++ b/benchmarks/benchmark_latency.py
@@ -11,7 +11,7 @@ from typing import Any, Dict, List, Optional
 import numpy as np
 import torch
-from benchmark_utils import convert_to_pytorch_benchmark_format
+from benchmark_utils import convert_to_pytorch_benchmark_format, write_to_json
 from tqdm import tqdm
 from vllm import LLM, SamplingParams
@@ -30,8 +30,7 @@ def save_to_pytorch_benchmark_format(args: argparse.Namespace,
                    for k in ["avg_latency", "percentiles"]})
    if pt_records:
        pt_file = f"{os.path.splitext(args.output_json)[0]}.pytorch.json"
-        with open(pt_file, "w") as f:
+        write_to_json(pt_file, pt_records)
-            json.dump(pt_records, f)
 def main(args: argparse.Namespace):

--- a/benchmarks/benchmark_serving.py
+++ b/benchmarks/benchmark_serving.py
@@ -56,7 +56,7 @@ try:
 except ImportError:
    from argparse import ArgumentParser as FlexibleArgumentParser
-from benchmark_utils import convert_to_pytorch_benchmark_format
+from benchmark_utils import convert_to_pytorch_benchmark_format, write_to_json
 MILLISECONDS_TO_SECONDS_CONVERSION = 1000
@@ -841,8 +841,7 @@ def save_to_pytorch_benchmark_format(args: argparse.Namespace,
    if pt_records:
        # Don't use json suffix here as we don't want CI to pick it up
        pt_file = f"{os.path.splitext(file_name)[0]}.pytorch.json"
-        with open(pt_file, "w") as f:
+        write_to_json(pt_file, pt_records)
-            json.dump(pt_records, f)
 def main(args: argparse.Namespace):

--- a/benchmarks/benchmark_throughput.py
+++ b/benchmarks/benchmark_throughput.py
@@ -11,7 +11,7 @@ from typing import Any, Dict, List, Optional, Tuple
 import torch
 import uvloop
-from benchmark_utils import convert_to_pytorch_benchmark_format
+from benchmark_utils import convert_to_pytorch_benchmark_format, write_to_json
 from PIL import Image
 from tqdm import tqdm
 from transformers import (AutoModelForCausalLM, AutoTokenizer,
@@ -366,8 +366,7 @@ def save_to_pytorch_benchmark_format(args: argparse.Namespace,
    if pt_records:
        # Don't use json suffix here as we don't want CI to pick it up
        pt_file = f"{os.path.splitext(args.output_json)[0]}.pytorch.json"
-        with open(pt_file, "w") as f:
+        write_to_json(pt_file, pt_records)
-            json.dump(pt_records, f)
 def main(args: argparse.Namespace):

--- a/benchmarks/benchmark_utils.py
+++ b/benchmarks/benchmark_utils.py
 # SPDX-License-Identifier: Apache-2.0
 import argparse
+import json
+import math
 import os
 from typing import Any, Dict, List
@@ -34,6 +36,34 @@ def convert_to_pytorch_benchmark_format(args: argparse.Namespace,
                "extra_info": extra_info,
            },
        }
+        tp = record["benchmark"]["extra_info"]["args"].get(
+            "tensor_parallel_size")
+        # Save tensor_parallel_size parameter if it's part of the metadata
+        if not tp and "tensor_parallel_size" in extra_info:
+            record["benchmark"]["extra_info"]["args"][
+                "tensor_parallel_size"] = extra_info["tensor_parallel_size"]
        records.append(record)
    return records
+class InfEncoder(json.JSONEncoder):
+    def clear_inf(self, o: Any):
+        if isinstance(o, dict):
+            return {k: self.clear_inf(v) for k, v in o.items()}
+        elif isinstance(o, list):
+            return [self.clear_inf(v) for v in o]
+        elif isinstance(o, float) and math.isinf(o):
+            return "inf"
+        return o
+    def iterencode(self, o: Any, *args, **kwargs) -> Any:
+        return super().iterencode(self.clear_inf(o), *args, **kwargs)
+def write_to_json(filename: str, records: List) -> None:
+    with open(filename, "w") as f:
+        json.dump(records, f, cls=InfEncoder)