Unverified Commit 4d4297e8 authored by lkchen's avatar lkchen Committed by GitHub
Browse files

[Bench] Split serve.py:main into async/async versions (#22405)


Signed-off-by: default avatarLinkun <github@lkchen.net>
parent 2a4c8255
...@@ -948,7 +948,10 @@ def add_cli_args(parser: argparse.ArgumentParser): ...@@ -948,7 +948,10 @@ def add_cli_args(parser: argparse.ArgumentParser):
) )
def main(args: argparse.Namespace): def main(args: argparse.Namespace) -> dict[str, Any]:
return asyncio.run(main_async(args))
async def main_async(args: argparse.Namespace) -> dict[str, Any]:
print(args) print(args)
random.seed(args.seed) random.seed(args.seed)
np.random.seed(args.seed) np.random.seed(args.seed)
...@@ -1025,8 +1028,7 @@ def main(args: argparse.Namespace): ...@@ -1025,8 +1028,7 @@ def main(args: argparse.Namespace):
gc.collect() gc.collect()
gc.freeze() gc.freeze()
benchmark_result = asyncio.run( benchmark_result = await benchmark(
benchmark(
endpoint_type=args.endpoint_type, endpoint_type=args.endpoint_type,
api_url=api_url, api_url=api_url,
base_url=base_url, base_url=base_url,
...@@ -1052,62 +1054,62 @@ def main(args: argparse.Namespace): ...@@ -1052,62 +1054,62 @@ def main(args: argparse.Namespace):
ramp_up_start_rps=args.ramp_up_start_rps, ramp_up_start_rps=args.ramp_up_start_rps,
ramp_up_end_rps=args.ramp_up_end_rps, ramp_up_end_rps=args.ramp_up_end_rps,
ready_check_timeout_sec=args.ready_check_timeout_sec, ready_check_timeout_sec=args.ready_check_timeout_sec,
)) )
# Save config and results to json # Save config and results to json
if args.save_result or args.append_result: result_json: dict[str, Any] = {}
result_json: dict[str, Any] = {}
# Setup
# Setup current_dt = datetime.now().strftime("%Y%m%d-%H%M%S")
current_dt = datetime.now().strftime("%Y%m%d-%H%M%S") result_json["date"] = current_dt
result_json["date"] = current_dt result_json["endpoint_type"] = args.endpoint_type
result_json["endpoint_type"] = args.endpoint_type result_json["label"] = label
result_json["label"] = label result_json["model_id"] = model_id
result_json["model_id"] = model_id result_json["tokenizer_id"] = tokenizer_id
result_json["tokenizer_id"] = tokenizer_id result_json["num_prompts"] = args.num_prompts
result_json["num_prompts"] = args.num_prompts
# Metadata
# Metadata if args.metadata:
if args.metadata: for item in args.metadata:
for item in args.metadata: if "=" in item:
if "=" in item: kvstring = item.split("=")
kvstring = item.split("=") result_json[kvstring[0].strip()] = kvstring[1].strip()
result_json[kvstring[0].strip()] = kvstring[1].strip() else:
else: raise ValueError(
raise ValueError( "Invalid metadata format. Please use KEY=VALUE format."
"Invalid metadata format. Please use KEY=VALUE format." )
)
# Traffic
result_json["request_rate"] = (args.request_rate if args.request_rate
< float("inf") else "inf")
result_json["burstiness"] = args.burstiness
result_json["max_concurrency"] = args.max_concurrency
if args.ramp_up_strategy is not None: # Traffic
result_json["ramp_up_strategy"] = args.ramp_up_strategy result_json["request_rate"] = (args.request_rate if args.request_rate
result_json["ramp_up_start_rps"] = args.ramp_up_start_rps < float("inf") else "inf")
result_json["ramp_up_end_rps"] = args.ramp_up_end_rps result_json["burstiness"] = args.burstiness
result_json["max_concurrency"] = args.max_concurrency
# Merge with benchmark result
result_json = {**result_json, **benchmark_result} if args.ramp_up_strategy is not None:
result_json["ramp_up_strategy"] = args.ramp_up_strategy
if not args.save_detailed: result_json["ramp_up_start_rps"] = args.ramp_up_start_rps
# Remove fields with too many data points result_json["ramp_up_end_rps"] = args.ramp_up_end_rps
for field in [
"input_lens", # Merge with benchmark result
"output_lens", result_json = {**result_json, **benchmark_result}
"ttfts",
"itls", if not args.save_detailed:
"generated_texts", # Remove fields with too many data points
"errors", for field in [
]: "input_lens",
if field in result_json: "output_lens",
del result_json[field] "ttfts",
if field in benchmark_result: "itls",
del benchmark_result[field] "generated_texts",
"errors",
]:
if field in result_json:
del result_json[field]
if field in benchmark_result:
del benchmark_result[field]
# Save to file # Save to file
if args.save_result or args.append_result:
base_model_id = model_id.split("/")[-1] base_model_id = model_id.split("/")[-1]
max_concurrency_str = (f"-concurrency{args.max_concurrency}" max_concurrency_str = (f"-concurrency{args.max_concurrency}"
if args.max_concurrency is not None else "") if args.max_concurrency is not None else "")
...@@ -1129,3 +1131,5 @@ def main(args: argparse.Namespace): ...@@ -1129,3 +1131,5 @@ def main(args: argparse.Namespace):
outfile.write("\n") outfile.write("\n")
json.dump(result_json, outfile) json.dump(result_json, outfile)
save_to_pytorch_benchmark_format(args, result_json, file_name) save_to_pytorch_benchmark_format(args, result_json, file_name)
return result_json
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment