Update benchmark script (#621)

bae9541e · Ying Sheng · a56858ba · bae9541e
Commit bae9541e authored Jul 14, 2024 by Ying Sheng
Show whitespace changes
Inline Side-by-side

Showing with 5 additions and 1 deletion

benchmark/latency_throughput/bench_one.py benchmark/latency_throughput/bench_one.py +5 -1

No files found.
--- a/benchmark/latency_throughput/bench_one.py
+++ b/benchmark/latency_throughput/bench_one.py
@@ -97,7 +97,10 @@ def run_one_batch_size(bs):
    print(ret)
    output_throughput = bs * max_new_tokens / latency
-    print(f"latency: {latency:.2f} s, speed: {output_throughput:.2f} token/s")
+    overall_throughput = bs * (args.input_len + max_new_tokens) / latency
+    print(f"latency: {latency:.2f} s")
+    print(f"decode throughput: {output_throughput:.2f} token/s")
+    print(f"overall throughput: {overall_throughput:.2f} token/s")
    with open("results.jsonl", "a") as fout:
        res = {
@@ -107,6 +110,7 @@ def run_one_batch_size(bs):
            "batch_size": bs,
            "latency": latency,
            "output_throughput": output_throughput,
+            "overall_throughput": overall_throughput,
        }
        fout.write(json.dumps(res) + "\n")