Unverified Commit 72d3a30c authored by Woosuk Kwon's avatar Woosuk Kwon Committed by GitHub
Browse files

[Minor] Fix benchmark_latency script (#2765)

parent c9b45ade
......@@ -37,7 +37,10 @@ def main(args: argparse.Namespace):
max_tokens=args.output_len,
)
print(sampling_params)
dummy_prompt_token_ids = [[0] * args.input_len] * args.batch_size
dummy_prompt_token_ids = np.random.randint(10000,
size=(args.batch_size,
args.input_len))
dummy_prompt_token_ids = dummy_prompt_token_ids.tolist()
def run_to_completion(profile_dir: Optional[str] = None):
if profile_dir:
......@@ -71,7 +74,7 @@ def main(args: argparse.Namespace):
"."
) / "vllm_benchmark_result" / f"latency_result_{time.time()}"
print(f"Profiling (results will be saved to '{profile_dir}')...")
run_to_completion(profile_dir=args.profile_result_dir)
run_to_completion(profile_dir=profile_dir)
return
# Benchmark.
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment