Unverified Commit 583a9778 authored by Simon Mo's avatar Simon Mo Committed by GitHub
Browse files

[Benchmark] Do not save detailed info to json by default (#14879)


Signed-off-by: default avatarsimon-mo <simon.mo@hey.com>
parent a73e183e
...@@ -14,7 +14,8 @@ from tqdm.asyncio import tqdm ...@@ -14,7 +14,8 @@ from tqdm.asyncio import tqdm
from transformers import (AutoTokenizer, PreTrainedTokenizer, from transformers import (AutoTokenizer, PreTrainedTokenizer,
PreTrainedTokenizerFast) PreTrainedTokenizerFast)
from vllm.model_executor.model_loader.weight_utils import get_lock # NOTE(simon): do not import vLLM here so the benchmark script
# can run without vLLM installed.
AIOHTTP_TIMEOUT = aiohttp.ClientTimeout(total=6 * 60 * 60) AIOHTTP_TIMEOUT = aiohttp.ClientTimeout(total=6 * 60 * 60)
...@@ -427,6 +428,8 @@ def get_model(pretrained_model_name_or_path: str) -> str: ...@@ -427,6 +428,8 @@ def get_model(pretrained_model_name_or_path: str) -> str:
if os.getenv('VLLM_USE_MODELSCOPE', 'False').lower() == 'true': if os.getenv('VLLM_USE_MODELSCOPE', 'False').lower() == 'true':
from modelscope import snapshot_download from modelscope import snapshot_download
from vllm.model_executor.model_loader.weight_utils import get_lock
# Use file lock to prevent multiple processes from # Use file lock to prevent multiple processes from
# downloading the same model weights at the same time. # downloading the same model weights at the same time.
with get_lock(pretrained_model_name_or_path): with get_lock(pretrained_model_name_or_path):
......
...@@ -684,6 +684,15 @@ def main(args: argparse.Namespace): ...@@ -684,6 +684,15 @@ def main(args: argparse.Namespace):
"Invalid metadata format. Please use KEY=VALUE format." "Invalid metadata format. Please use KEY=VALUE format."
) )
if not args.save_detailed:
# Remove fields with too many data points
for field in [
"input_lens", "output_lens", "ttfts", "itls",
"generated_texts", "errors"
]:
if field in result_json:
del result_json[field]
# Traffic # Traffic
result_json["request_rate"] = (args.request_rate if args.request_rate result_json["request_rate"] = (args.request_rate if args.request_rate
< float("inf") else "inf") < float("inf") else "inf")
...@@ -828,6 +837,12 @@ if __name__ == "__main__": ...@@ -828,6 +837,12 @@ if __name__ == "__main__":
action="store_true", action="store_true",
help="Specify to save benchmark results to a json file", help="Specify to save benchmark results to a json file",
) )
parser.add_argument(
"--save-detailed",
action="store_true",
help="When saving the results, whether to include per request "
"information such as response, error, ttfs, tpots, etc.",
)
parser.add_argument( parser.add_argument(
"--metadata", "--metadata",
metavar="KEY=VALUE", metavar="KEY=VALUE",
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment