[Misc] Use public API in benchmark_throughput (#4300)

a395a638 · zifeitong · GitHub · 2768884a · a395a638
Unverified Commit a395a638 authored Apr 24, 2024 by zifeitong Committed by GitHub Apr 24, 2024
Hide whitespace changes
Inline Side-by-side

Showing with 13 additions and 16 deletions

benchmarks/benchmark_throughput.py benchmarks/benchmark_throughput.py +13 -16

No files found.
--- a/benchmarks/benchmark_throughput.py
+++ b/benchmarks/benchmark_throughput.py
@@ -103,25 +103,22 @@ def run_vllm(
    )

    # Add the requests to the engine.
+    prompts = []
+    sampling_params = []
    for prompt, _, output_len in requests:
-        sampling_params = SamplingParams(
-            n=n,
-            temperature=0.0 if use_beam_search else 1.0,
-            top_p=1.0,
-            use_beam_search=use_beam_search,
-            ignore_eos=True,
-            max_tokens=output_len,
-        )
-        # FIXME(woosuk): Do not use internal method.
-        llm._add_request(
-            prompt=prompt,
-            prompt_token_ids=None,
-            sampling_params=sampling_params,
-        )
+        prompts.append(prompt)
+        sampling_params.append(
+            SamplingParams(
+                n=n,
+                temperature=0.0 if use_beam_search else 1.0,
+                top_p=1.0,
+                use_beam_search=use_beam_search,
+                ignore_eos=True,
+                max_tokens=output_len,
+            ))

    start = time.perf_counter()
-    # FIXME(woosuk): Do not use internal method.
-    llm._run_engine(use_tqdm=True)
+    llm.generate(prompts, sampling_params, use_tqdm=True)
    end = time.perf_counter()
    return end - start