Unverified Commit a395a638 authored by zifeitong's avatar zifeitong Committed by GitHub
Browse files

[Misc] Use public API in benchmark_throughput (#4300)

parent 2768884a
...@@ -103,25 +103,22 @@ def run_vllm( ...@@ -103,25 +103,22 @@ def run_vllm(
) )
# Add the requests to the engine. # Add the requests to the engine.
prompts = []
sampling_params = []
for prompt, _, output_len in requests: for prompt, _, output_len in requests:
sampling_params = SamplingParams( prompts.append(prompt)
n=n, sampling_params.append(
temperature=0.0 if use_beam_search else 1.0, SamplingParams(
top_p=1.0, n=n,
use_beam_search=use_beam_search, temperature=0.0 if use_beam_search else 1.0,
ignore_eos=True, top_p=1.0,
max_tokens=output_len, use_beam_search=use_beam_search,
) ignore_eos=True,
# FIXME(woosuk): Do not use internal method. max_tokens=output_len,
llm._add_request( ))
prompt=prompt,
prompt_token_ids=None,
sampling_params=sampling_params,
)
start = time.perf_counter() start = time.perf_counter()
# FIXME(woosuk): Do not use internal method. llm.generate(prompts, sampling_params, use_tqdm=True)
llm._run_engine(use_tqdm=True)
end = time.perf_counter() end = time.perf_counter()
return end - start return end - start
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment