[Benchmark] Change mii to use persistent deployment and support tensor parallel (#3628)

98a42e70 · Yile (Michael) Gu · GitHub · 0267fef5 · 98a42e70
Unverified Commit 98a42e70 authored Mar 28, 2024 by Yile (Michael) Gu Committed by GitHub Mar 28, 2024
Show whitespace changes
Inline Side-by-side

Showing with 5 additions and 3 deletions

benchmarks/benchmark_throughput.py benchmarks/benchmark_throughput.py +5 -3

No files found.
--- a/benchmarks/benchmark_throughput.py
+++ b/benchmarks/benchmark_throughput.py
@@ -183,13 +183,15 @@ def run_mii(
    tensor_parallel_size: int,
    output_len: int,
 ) -> float:
-    from mii import pipeline
+    from mii import client, serve
-    llm = pipeline(model, tensor_parallel=tensor_parallel_size)
+    llm = serve(model, tensor_parallel=tensor_parallel_size)
    prompts = [prompt for prompt, _, _ in requests]
    start = time.perf_counter()
-    llm(prompts, max_new_tokens=output_len)
+    llm.generate(prompts, max_new_tokens=output_len)
    end = time.perf_counter()
+    client = client(model)
+    client.terminate_server()
    return end - start