Unverified Commit 98a42e70 authored by Yile (Michael) Gu's avatar Yile (Michael) Gu Committed by GitHub
Browse files

[Benchmark] Change mii to use persistent deployment and support tensor parallel (#3628)

parent 0267fef5
......@@ -183,13 +183,15 @@ def run_mii(
tensor_parallel_size: int,
output_len: int,
) -> float:
from mii import pipeline
llm = pipeline(model, tensor_parallel=tensor_parallel_size)
from mii import client, serve
llm = serve(model, tensor_parallel=tensor_parallel_size)
prompts = [prompt for prompt, _, _ in requests]
start = time.perf_counter()
llm(prompts, max_new_tokens=output_len)
llm.generate(prompts, max_new_tokens=output_len)
end = time.perf_counter()
client = client(model)
client.terminate_server()
return end - start
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment