Commit 49bfe4cb authored by zhuwenwen's avatar zhuwenwen
Browse files

add latency

parent 99d49945
...@@ -443,6 +443,7 @@ def main(args: argparse.Namespace): ...@@ -443,6 +443,7 @@ def main(args: argparse.Namespace):
"following metrics are not accurate because image tokens are not" "following metrics are not accurate because image tokens are not"
" counted. See vllm-project/vllm/issues/9778 for details.") " counted. See vllm-project/vllm/issues/9778 for details.")
# TODO(vllm-project/vllm/issues/9778): Count molti-modal token length. # TODO(vllm-project/vllm/issues/9778): Count molti-modal token length.
print(f"Latency: {elapsed_time:.2f} s")
print(f"Throughput: {len(requests) / elapsed_time:.2f} requests/s, " print(f"Throughput: {len(requests) / elapsed_time:.2f} requests/s, "
f"{total_num_tokens / elapsed_time:.2f} total tokens/s, " f"{total_num_tokens / elapsed_time:.2f} total tokens/s, "
f"{total_output_tokens / elapsed_time:.2f} output tokens/s") f"{total_output_tokens / elapsed_time:.2f} output tokens/s")
......
# SPDX-License-Identifier: Apache-2.0
"""Benchmark offline inference throughput.""" """Benchmark offline inference throughput."""
import argparse import argparse
import dataclasses import dataclasses
...@@ -442,6 +443,7 @@ def main(args: argparse.Namespace): ...@@ -442,6 +443,7 @@ def main(args: argparse.Namespace):
"following metrics are not accurate because image tokens are not" "following metrics are not accurate because image tokens are not"
" counted. See vllm-project/vllm/issues/9778 for details.") " counted. See vllm-project/vllm/issues/9778 for details.")
# TODO(vllm-project/vllm/issues/9778): Count molti-modal token length. # TODO(vllm-project/vllm/issues/9778): Count molti-modal token length.
print(f"Latency: {elapsed_time:.2f} s")
print(f"Throughput: {len(requests) / elapsed_time:.2f} requests/s, " print(f"Throughput: {len(requests) / elapsed_time:.2f} requests/s, "
f"{total_num_tokens / elapsed_time:.2f} total tokens/s, " f"{total_num_tokens / elapsed_time:.2f} total tokens/s, "
f"{total_output_tokens / elapsed_time:.2f} output tokens/s") f"{total_output_tokens / elapsed_time:.2f} output tokens/s")
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment