update benchmark_throughput.py

17928589 · zhuwenwen · 8ae929fb · 17928589 · 17928589
Commit 17928589 authored Aug 15, 2024 by zhuwenwen
Hide whitespace changes
Inline Side-by-side

Showing with 20 additions and 14 deletions

benchmarks/benchmark_throughput.py benchmarks/benchmark_throughput.py +10 -7

vllm/benchmark_throughput.py vllm/benchmark_throughput.py +10 -7

No files found.
--- a/benchmarks/benchmark_throughput.py
+++ b/benchmarks/benchmark_throughput.py
@@ -287,14 +287,17 @@ def main(args: argparse.Namespace):
        raise ValueError(f"Unknown backend: {args.backend}")
    total_num_tokens = sum(prompt_len + output_len
                           for _, prompt_len, output_len in requests)
-    if args.dataset is None:
-        total_out_tokens = args.output_len * args.num_prompts
-    else:
-        total_out_tokens = sum(output_len for _, _, output_len in requests) 
-    print(f"Latency: {elapsed_time:.2f} s")
-    print(f"All Throughput: {len(requests) / elapsed_time:.2f} requests/s, "
+    print(f"Throughput: {len(requests) / elapsed_time:.2f} requests/s, "
          f"{total_num_tokens / elapsed_time:.2f} tokens/s")
-    print(f"Generate Throughput: {total_out_tokens / elapsed_time:.2f} tokens/s")
+    
+    # if args.dataset is None:
+    #     total_out_tokens = args.output_len * args.num_prompts
+    # else:
+    #     total_out_tokens = sum(output_len for _, _, output_len in requests) 
+    # print(f"Latency: {elapsed_time:.2f} s")
+    # print(f"All Throughput: {len(requests) / elapsed_time:.2f} requests/s, "
+    #       f"{total_num_tokens / elapsed_time:.2f} tokens/s")
+    # print(f"Generate Throughput: {total_out_tokens / elapsed_time:.2f} tokens/s")


    # Output JSON results if specified

--- a/vllm/benchmark_throughput.py
+++ b/vllm/benchmark_throughput.py
@@ -287,14 +287,17 @@ def main(args: argparse.Namespace):
        raise ValueError(f"Unknown backend: {args.backend}")
    total_num_tokens = sum(prompt_len + output_len
                           for _, prompt_len, output_len in requests)
-    if args.dataset is None:
-        total_out_tokens = args.output_len * args.num_prompts
-    else:
-        total_out_tokens = sum(output_len for _, _, output_len in requests) 
-    print(f"Latency: {elapsed_time:.2f} s")
-    print(f"All Throughput: {len(requests) / elapsed_time:.2f} requests/s, "
+    print(f"Throughput: {len(requests) / elapsed_time:.2f} requests/s, "
          f"{total_num_tokens / elapsed_time:.2f} tokens/s")
-    print(f"Generate Throughput: {total_out_tokens / elapsed_time:.2f} tokens/s")
+    
+    # if args.dataset is None:
+    #     total_out_tokens = args.output_len * args.num_prompts
+    # else:
+    #     total_out_tokens = sum(output_len for _, _, output_len in requests) 
+    # print(f"Latency: {elapsed_time:.2f} s")
+    # print(f"All Throughput: {len(requests) / elapsed_time:.2f} requests/s, "
+    #       f"{total_num_tokens / elapsed_time:.2f} tokens/s")
+    # print(f"Generate Throughput: {total_out_tokens / elapsed_time:.2f} tokens/s")


    # Output JSON results if specified