"tests/vscode:/vscode.git/clone" did not exist on "328cbb2773d93d45d18dfd383d631de4fa276e69"
Unverified Commit d8714530 authored by DearPlanet's avatar DearPlanet Committed by GitHub
Browse files

[Misc]Add param max-model-len in benchmark_latency.py (#5629)

parent 7d46c8d3
...@@ -29,6 +29,7 @@ def main(args: argparse.Namespace): ...@@ -29,6 +29,7 @@ def main(args: argparse.Namespace):
tensor_parallel_size=args.tensor_parallel_size, tensor_parallel_size=args.tensor_parallel_size,
trust_remote_code=args.trust_remote_code, trust_remote_code=args.trust_remote_code,
dtype=args.dtype, dtype=args.dtype,
max_model_len=args.max_model_len,
enforce_eager=args.enforce_eager, enforce_eager=args.enforce_eager,
kv_cache_dtype=args.kv_cache_dtype, kv_cache_dtype=args.kv_cache_dtype,
quantization_param_path=args.quantization_param_path, quantization_param_path=args.quantization_param_path,
...@@ -150,6 +151,12 @@ if __name__ == '__main__': ...@@ -150,6 +151,12 @@ if __name__ == '__main__':
parser.add_argument('--trust-remote-code', parser.add_argument('--trust-remote-code',
action='store_true', action='store_true',
help='trust remote code from huggingface') help='trust remote code from huggingface')
parser.add_argument(
'--max-model-len',
type=int,
default=None,
help='Maximum length of a sequence (including prompt and output). '
'If None, will be derived from the model.')
parser.add_argument( parser.add_argument(
'--dtype', '--dtype',
type=str, type=str,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment