Unverified Commit 98a42e70 authored by Yile (Michael) Gu's avatar Yile (Michael) Gu Committed by GitHub
Browse files

[Benchmark] Change mii to use persistent deployment and support tensor parallel (#3628)

parent 0267fef5
...@@ -183,13 +183,15 @@ def run_mii( ...@@ -183,13 +183,15 @@ def run_mii(
tensor_parallel_size: int, tensor_parallel_size: int,
output_len: int, output_len: int,
) -> float: ) -> float:
from mii import pipeline from mii import client, serve
llm = pipeline(model, tensor_parallel=tensor_parallel_size) llm = serve(model, tensor_parallel=tensor_parallel_size)
prompts = [prompt for prompt, _, _ in requests] prompts = [prompt for prompt, _, _ in requests]
start = time.perf_counter() start = time.perf_counter()
llm(prompts, max_new_tokens=output_len) llm.generate(prompts, max_new_tokens=output_len)
end = time.perf_counter() end = time.perf_counter()
client = client(model)
client.terminate_server()
return end - start return end - start
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment