Unverified Commit 72d3a30c authored by Woosuk Kwon's avatar Woosuk Kwon Committed by GitHub
Browse files

[Minor] Fix benchmark_latency script (#2765)

parent c9b45ade
...@@ -37,7 +37,10 @@ def main(args: argparse.Namespace): ...@@ -37,7 +37,10 @@ def main(args: argparse.Namespace):
max_tokens=args.output_len, max_tokens=args.output_len,
) )
print(sampling_params) print(sampling_params)
dummy_prompt_token_ids = [[0] * args.input_len] * args.batch_size dummy_prompt_token_ids = np.random.randint(10000,
size=(args.batch_size,
args.input_len))
dummy_prompt_token_ids = dummy_prompt_token_ids.tolist()
def run_to_completion(profile_dir: Optional[str] = None): def run_to_completion(profile_dir: Optional[str] = None):
if profile_dir: if profile_dir:
...@@ -71,7 +74,7 @@ def main(args: argparse.Namespace): ...@@ -71,7 +74,7 @@ def main(args: argparse.Namespace):
"." "."
) / "vllm_benchmark_result" / f"latency_result_{time.time()}" ) / "vllm_benchmark_result" / f"latency_result_{time.time()}"
print(f"Profiling (results will be saved to '{profile_dir}')...") print(f"Profiling (results will be saved to '{profile_dir}')...")
run_to_completion(profile_dir=args.profile_result_dir) run_to_completion(profile_dir=profile_dir)
return return
# Benchmark. # Benchmark.
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment