add latency

49bfe4cb · zhuwenwen · 99d49945 · 49bfe4cb · 49bfe4cb
Commit 49bfe4cb authored Feb 11, 2025 by zhuwenwen
Show whitespace changes
Inline Side-by-side

Showing with 3 additions and 0 deletions

benchmarks/benchmark_throughput.py benchmarks/benchmark_throughput.py +1 -0

vllm/benchmarks/benchmark_throughput.py vllm/benchmarks/benchmark_throughput.py +2 -0

No files found.
--- a/benchmarks/benchmark_throughput.py
+++ b/benchmarks/benchmark_throughput.py
@@ -443,6 +443,7 @@ def main(args: argparse.Namespace):
              "following metrics are not accurate because image tokens are not"
              " counted. See vllm-project/vllm/issues/9778 for details.")
        # TODO(vllm-project/vllm/issues/9778): Count molti-modal token length.
+    print(f"Latency: {elapsed_time:.2f} s")
    print(f"Throughput: {len(requests) / elapsed_time:.2f} requests/s, "
          f"{total_num_tokens / elapsed_time:.2f} total tokens/s, "
          f"{total_output_tokens / elapsed_time:.2f} output tokens/s")

--- a/vllm/benchmarks/benchmark_throughput.py
+++ b/vllm/benchmarks/benchmark_throughput.py
+# SPDX-License-Identifier: Apache-2.0
 """Benchmark offline inference throughput."""
 import argparse
 import dataclasses
@@ -442,6 +443,7 @@ def main(args: argparse.Namespace):
              "following metrics are not accurate because image tokens are not"
              " counted. See vllm-project/vllm/issues/9778 for details.")
        # TODO(vllm-project/vllm/issues/9778): Count molti-modal token length.
+    print(f"Latency: {elapsed_time:.2f} s")
    print(f"Throughput: {len(requests) / elapsed_time:.2f} requests/s, "
          f"{total_num_tokens / elapsed_time:.2f} total tokens/s, "
          f"{total_output_tokens / elapsed_time:.2f} output tokens/s")