Unverified Commit fa7ccb33 authored by zhyncs's avatar zhyncs Committed by GitHub
Browse files

feat: add e2e latency (#704)

parent 26868443
......@@ -264,6 +264,8 @@ class BenchmarkMetrics:
median_itl_ms: float
std_itl_ms: float
p99_itl_ms: float
mean_e2e_latency_ms: float
median_e2e_latency_ms: float
default_sharegpt_path = "ShareGPT_V3_unfiltered_cleaned_split.json"
......@@ -467,6 +469,7 @@ def calculate_metrics(
itls: List[float] = []
tpots: List[float] = []
ttfts: List[float] = []
e2e_latencies: List[float] = []
for i in range(len(outputs)):
if outputs[i].success:
output_len = outputs[i].output_len
......@@ -480,6 +483,9 @@ def calculate_metrics(
tpots.append((outputs[i].latency - outputs[i].ttft) / (output_len - 1))
itls += outputs[i].itl
ttfts.append(outputs[i].ttft)
e2e_latencies.append(outputs[i].latency)
completed += 1
else:
output_lens.append(0)
......@@ -513,6 +519,8 @@ def calculate_metrics(
median_itl_ms=np.median(itls or 0) * 1000,
std_itl_ms=np.std(itls or 0) * 1000,
p99_itl_ms=np.percentile(itls or 0, 99) * 1000,
mean_e2e_latency_ms=np.mean(e2e_latencies) * 1000,
median_e2e_latency_ms=np.median(e2e_latencies) * 1000,
)
return metrics, output_lens
......@@ -611,6 +619,15 @@ async def benchmark(
"Output token throughput (tok/s):", metrics.output_throughput
)
)
print("{s:{c}^{n}}".format(s="End-to-End Latency", n=50, c="-"))
print(
"{:<40} {:<10.2f}".format("Mean E2E Latency (ms):", metrics.mean_e2e_latency_ms)
)
print(
"{:<40} {:<10.2f}".format(
"Median E2E Latency (ms):", metrics.median_e2e_latency_ms
)
)
print("{s:{c}^{n}}".format(s="Time to First Token", n=50, c="-"))
print("{:<40} {:<10.2f}".format("Mean TTFT (ms):", metrics.mean_ttft_ms))
print("{:<40} {:<10.2f}".format("Median TTFT (ms):", metrics.median_ttft_ms))
......@@ -639,6 +656,8 @@ async def benchmark(
"total_input": metrics.total_input,
"total_output": metrics.total_output,
"total_output_retokenized": metrics.total_output_retokenized,
"mean_e2e_latency": metrics.mean_e2e_latency_ms,
"median_e2e_latency": metrics.median_e2e_latency_ms,
"median_ttft": metrics.median_ttft_ms,
"median_itl": metrics.median_itl_ms,
"output_token_throughput": metrics.output_throughput,
......@@ -693,6 +712,8 @@ async def benchmark(
"itls": [output.itl for output in outputs],
"generated_texts": [output.generated_text for output in outputs],
"errors": [output.error for output in outputs],
"mean_e2e_latency_ms": metrics.mean_e2e_latency_ms,
"median_e2e_latency_ms": metrics.median_e2e_latency_ms,
}
return result
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment