Unverified Commit 6fa718a4 authored by Reid's avatar Reid Committed by GitHub
Browse files

[Misc] Modularize CLI Argument Parsing in Benchmark Scripts (#19593)


Signed-off-by: default avatarreidliu41 <reid201711@gmail.com>
Co-authored-by: default avatarreidliu41 <reid201711@gmail.com>
parent 06be8588
...@@ -123,7 +123,7 @@ def main(args: argparse.Namespace): ...@@ -123,7 +123,7 @@ def main(args: argparse.Namespace):
save_to_pytorch_benchmark_format(args, results) save_to_pytorch_benchmark_format(args, results)
if __name__ == "__main__": def create_argument_parser():
parser = FlexibleArgumentParser( parser = FlexibleArgumentParser(
description="Benchmark the latency of processing a single batch of " description="Benchmark the latency of processing a single batch of "
"requests till completion." "requests till completion."
...@@ -171,6 +171,12 @@ if __name__ == "__main__": ...@@ -171,6 +171,12 @@ if __name__ == "__main__":
# V1 enables prefix caching by default which skews the latency # V1 enables prefix caching by default which skews the latency
# numbers. We need to disable prefix caching by default. # numbers. We need to disable prefix caching by default.
parser.set_defaults(enable_prefix_caching=False) parser.set_defaults(enable_prefix_caching=False)
return parser
if __name__ == "__main__":
parser = create_argument_parser()
args = parser.parse_args() args = parser.parse_args()
if args.profile and not envs.VLLM_TORCH_PROFILER_DIR: if args.profile and not envs.VLLM_TORCH_PROFILER_DIR:
raise OSError( raise OSError(
......
...@@ -142,7 +142,7 @@ def main(args): ...@@ -142,7 +142,7 @@ def main(args):
) )
if __name__ == "__main__": def create_argument_parser():
parser = FlexibleArgumentParser( parser = FlexibleArgumentParser(
description="Benchmark the performance with or " description="Benchmark the performance with or "
"without automatic prefix caching." "without automatic prefix caching."
...@@ -192,5 +192,11 @@ if __name__ == "__main__": ...@@ -192,5 +192,11 @@ if __name__ == "__main__":
) )
parser = EngineArgs.add_cli_args(parser) parser = EngineArgs.add_cli_args(parser)
return parser
if __name__ == "__main__":
parser = create_argument_parser()
args = parser.parse_args() args = parser.parse_args()
main(args) main(args)
...@@ -218,7 +218,7 @@ def main(args): ...@@ -218,7 +218,7 @@ def main(args):
) )
if __name__ == "__main__": def create_argument_parser():
parser = FlexibleArgumentParser( parser = FlexibleArgumentParser(
description="Benchmark the performance with or without " description="Benchmark the performance with or without "
"automatic prefix caching." "automatic prefix caching."
...@@ -268,5 +268,11 @@ if __name__ == "__main__": ...@@ -268,5 +268,11 @@ if __name__ == "__main__":
) )
parser = EngineArgs.add_cli_args(parser) parser = EngineArgs.add_cli_args(parser)
return parser
if __name__ == "__main__":
parser = create_argument_parser()
args = parser.parse_args() args = parser.parse_args()
main(args) main(args)
...@@ -161,7 +161,7 @@ def main(args: argparse.Namespace): ...@@ -161,7 +161,7 @@ def main(args: argparse.Namespace):
json.dump(results, f, indent=4) json.dump(results, f, indent=4)
if __name__ == "__main__": def create_argument_parser():
parser = FlexibleArgumentParser(description="Benchmark the throughput.") parser = FlexibleArgumentParser(description="Benchmark the throughput.")
parser.add_argument( parser.add_argument(
"--backend", type=str, choices=["vllm", "hf", "mii"], default="vllm" "--backend", type=str, choices=["vllm", "hf", "mii"], default="vllm"
...@@ -204,6 +204,12 @@ if __name__ == "__main__": ...@@ -204,6 +204,12 @@ if __name__ == "__main__":
) )
parser = EngineArgs.add_cli_args(parser) parser = EngineArgs.add_cli_args(parser)
return parser
if __name__ == "__main__":
parser = create_argument_parser()
args = parser.parse_args() args = parser.parse_args()
if args.tokenizer is None: if args.tokenizer is None:
args.tokenizer = args.model args.tokenizer = args.model
......
...@@ -875,7 +875,7 @@ def main(args: argparse.Namespace): ...@@ -875,7 +875,7 @@ def main(args: argparse.Namespace):
save_to_pytorch_benchmark_format(args, result_json, file_name) save_to_pytorch_benchmark_format(args, result_json, file_name)
if __name__ == "__main__": def create_argument_parser():
parser = FlexibleArgumentParser( parser = FlexibleArgumentParser(
description="Benchmark the online serving throughput." description="Benchmark the online serving throughput."
) )
...@@ -1225,6 +1225,10 @@ if __name__ == "__main__": ...@@ -1225,6 +1225,10 @@ if __name__ == "__main__":
"script chooses a LoRA module at random.", "script chooses a LoRA module at random.",
) )
args = parser.parse_args() return parser
if __name__ == "__main__":
parser = create_argument_parser()
args = parser.parse_args()
main(args) main(args)
...@@ -850,7 +850,7 @@ def main(args: argparse.Namespace): ...@@ -850,7 +850,7 @@ def main(args: argparse.Namespace):
json.dump(results, outfile, indent=4) json.dump(results, outfile, indent=4)
if __name__ == "__main__": def create_argument_parser():
parser = FlexibleArgumentParser( parser = FlexibleArgumentParser(
description="Benchmark the online serving throughput." description="Benchmark the online serving throughput."
) )
...@@ -1034,5 +1034,10 @@ if __name__ == "__main__": ...@@ -1034,5 +1034,10 @@ if __name__ == "__main__":
help="Ratio of Structured Outputs requests", help="Ratio of Structured Outputs requests",
) )
return parser
if __name__ == "__main__":
parser = create_argument_parser()
args = parser.parse_args() args = parser.parse_args()
main(args) main(args)
...@@ -595,7 +595,7 @@ def validate_args(args): ...@@ -595,7 +595,7 @@ def validate_args(args):
) )
if __name__ == "__main__": def create_argument_parser():
parser = FlexibleArgumentParser(description="Benchmark the throughput.") parser = FlexibleArgumentParser(description="Benchmark the throughput.")
parser.add_argument( parser.add_argument(
"--backend", "--backend",
...@@ -717,6 +717,12 @@ if __name__ == "__main__": ...@@ -717,6 +717,12 @@ if __name__ == "__main__":
) )
parser = AsyncEngineArgs.add_cli_args(parser) parser = AsyncEngineArgs.add_cli_args(parser)
return parser
if __name__ == "__main__":
parser = create_argument_parser()
args = parser.parse_args() args = parser.parse_args()
if args.tokenizer is None: if args.tokenizer is None:
args.tokenizer = args.model args.tokenizer = args.model
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment