Unverified Commit 533db093 authored by Peter Pan's avatar Peter Pan Committed by GitHub
Browse files

[benchmark] add max-concurrency in result table (#21095)


Signed-off-by: default avatarPeter Pan <Peter.Pan@daocloud.io>
parent fc91da54
...@@ -413,6 +413,10 @@ async def benchmark( ...@@ -413,6 +413,10 @@ async def benchmark(
print("{s:{c}^{n}}".format(s=" Serving Benchmark Result ", n=50, c="=")) print("{s:{c}^{n}}".format(s=" Serving Benchmark Result ", n=50, c="="))
print("{:<40} {:<10}".format("Successful requests:", metrics.completed)) print("{:<40} {:<10}".format("Successful requests:", metrics.completed))
if max_concurrency is not None:
print("{:<40} {:<10}".format("Maximum request concurrency:", max_concurrency))
if request_rate != float("inf"):
print("{:<40} {:<10.2f}".format("Request rate configured (RPS):", request_rate))
print("{:<40} {:<10.2f}".format("Benchmark duration (s):", benchmark_duration)) print("{:<40} {:<10.2f}".format("Benchmark duration (s):", benchmark_duration))
print("{:<40} {:<10}".format("Total input tokens:", metrics.total_input)) print("{:<40} {:<10}".format("Total input tokens:", metrics.total_input))
print("{:<40} {:<10}".format("Total generated tokens:", metrics.total_output)) print("{:<40} {:<10}".format("Total generated tokens:", metrics.total_output))
......
...@@ -555,6 +555,10 @@ async def benchmark( ...@@ -555,6 +555,10 @@ async def benchmark(
print("{s:{c}^{n}}".format(s=" Serving Benchmark Result ", n=50, c="=")) print("{s:{c}^{n}}".format(s=" Serving Benchmark Result ", n=50, c="="))
print("{:<40} {:<10}".format("Successful requests:", metrics.completed)) print("{:<40} {:<10}".format("Successful requests:", metrics.completed))
if max_concurrency is not None:
print("{:<40} {:<10}".format("Maximum request concurrency:", max_concurrency))
if request_rate != float("inf"):
print("{:<40} {:<10.2f}".format("Request rate configured (RPS):", request_rate))
print("{:<40} {:<10.2f}".format("Benchmark duration (s):", benchmark_duration)) print("{:<40} {:<10.2f}".format("Benchmark duration (s):", benchmark_duration))
print("{:<40} {:<10}".format("Total input tokens:", metrics.total_input)) print("{:<40} {:<10}".format("Total input tokens:", metrics.total_input))
print("{:<40} {:<10}".format("Total generated tokens:", metrics.total_output)) print("{:<40} {:<10}".format("Total generated tokens:", metrics.total_output))
......
...@@ -486,6 +486,12 @@ async def benchmark( ...@@ -486,6 +486,12 @@ async def benchmark(
print("{s:{c}^{n}}".format(s=' Serving Benchmark Result ', n=50, c='=')) print("{s:{c}^{n}}".format(s=' Serving Benchmark Result ', n=50, c='='))
print("{:<40} {:<10}".format("Successful requests:", metrics.completed)) print("{:<40} {:<10}".format("Successful requests:", metrics.completed))
if max_concurrency is not None:
print("{:<40} {:<10}".format("Maximum request concurrency:",
max_concurrency))
if request_rate != float('inf'):
print("{:<40} {:<10.2f}".format("Request rate configured (RPS):",
request_rate ))
print("{:<40} {:<10.2f}".format("Benchmark duration (s):", print("{:<40} {:<10.2f}".format("Benchmark duration (s):",
benchmark_duration)) benchmark_duration))
print("{:<40} {:<10}".format("Total input tokens:", metrics.total_input)) print("{:<40} {:<10}".format("Total input tokens:", metrics.total_input))
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment