Unverified Commit 6fa718a4 authored by Reid's avatar Reid Committed by GitHub
Browse files

[Misc] Modularize CLI Argument Parsing in Benchmark Scripts (#19593)


Signed-off-by: default avatarreidliu41 <reid201711@gmail.com>
Co-authored-by: default avatarreidliu41 <reid201711@gmail.com>
parent 06be8588
......@@ -123,7 +123,7 @@ def main(args: argparse.Namespace):
save_to_pytorch_benchmark_format(args, results)
if __name__ == "__main__":
def create_argument_parser():
parser = FlexibleArgumentParser(
description="Benchmark the latency of processing a single batch of "
"requests till completion."
......@@ -171,6 +171,12 @@ if __name__ == "__main__":
# V1 enables prefix caching by default which skews the latency
# numbers. We need to disable prefix caching by default.
parser.set_defaults(enable_prefix_caching=False)
return parser
if __name__ == "__main__":
parser = create_argument_parser()
args = parser.parse_args()
if args.profile and not envs.VLLM_TORCH_PROFILER_DIR:
raise OSError(
......
......@@ -142,7 +142,7 @@ def main(args):
)
if __name__ == "__main__":
def create_argument_parser():
parser = FlexibleArgumentParser(
description="Benchmark the performance with or "
"without automatic prefix caching."
......@@ -192,5 +192,11 @@ if __name__ == "__main__":
)
parser = EngineArgs.add_cli_args(parser)
return parser
if __name__ == "__main__":
parser = create_argument_parser()
args = parser.parse_args()
main(args)
......@@ -218,7 +218,7 @@ def main(args):
)
if __name__ == "__main__":
def create_argument_parser():
parser = FlexibleArgumentParser(
description="Benchmark the performance with or without "
"automatic prefix caching."
......@@ -268,5 +268,11 @@ if __name__ == "__main__":
)
parser = EngineArgs.add_cli_args(parser)
return parser
if __name__ == "__main__":
parser = create_argument_parser()
args = parser.parse_args()
main(args)
......@@ -161,7 +161,7 @@ def main(args: argparse.Namespace):
json.dump(results, f, indent=4)
if __name__ == "__main__":
def create_argument_parser():
parser = FlexibleArgumentParser(description="Benchmark the throughput.")
parser.add_argument(
"--backend", type=str, choices=["vllm", "hf", "mii"], default="vllm"
......@@ -204,6 +204,12 @@ if __name__ == "__main__":
)
parser = EngineArgs.add_cli_args(parser)
return parser
if __name__ == "__main__":
parser = create_argument_parser()
args = parser.parse_args()
if args.tokenizer is None:
args.tokenizer = args.model
......
......@@ -875,7 +875,7 @@ def main(args: argparse.Namespace):
save_to_pytorch_benchmark_format(args, result_json, file_name)
if __name__ == "__main__":
def create_argument_parser():
parser = FlexibleArgumentParser(
description="Benchmark the online serving throughput."
)
......@@ -1225,6 +1225,10 @@ if __name__ == "__main__":
"script chooses a LoRA module at random.",
)
args = parser.parse_args()
return parser
if __name__ == "__main__":
parser = create_argument_parser()
args = parser.parse_args()
main(args)
......@@ -850,7 +850,7 @@ def main(args: argparse.Namespace):
json.dump(results, outfile, indent=4)
if __name__ == "__main__":
def create_argument_parser():
parser = FlexibleArgumentParser(
description="Benchmark the online serving throughput."
)
......@@ -1034,5 +1034,10 @@ if __name__ == "__main__":
help="Ratio of Structured Outputs requests",
)
return parser
if __name__ == "__main__":
parser = create_argument_parser()
args = parser.parse_args()
main(args)
......@@ -595,7 +595,7 @@ def validate_args(args):
)
if __name__ == "__main__":
def create_argument_parser():
parser = FlexibleArgumentParser(description="Benchmark the throughput.")
parser.add_argument(
"--backend",
......@@ -717,6 +717,12 @@ if __name__ == "__main__":
)
parser = AsyncEngineArgs.add_cli_args(parser)
return parser
if __name__ == "__main__":
parser = create_argument_parser()
args = parser.parse_args()
if args.tokenizer is None:
args.tokenizer = args.model
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment