Unverified Commit b169d5f7 authored by Duyi-Wang's avatar Duyi-Wang Committed by GitHub
Browse files

[Misc][Tools][Benchmark] Add benchmark_serving supports for llama.cpp. (#18692)


Signed-off-by: default avatarDuyi-Wang <duyi.wang@intel.com>
parent f8977c23
...@@ -324,7 +324,7 @@ async def async_request_openai_completions( ...@@ -324,7 +324,7 @@ async def async_request_openai_completions(
most_recent_timestamp = timestamp most_recent_timestamp = timestamp
generated_text += text or "" generated_text += text or ""
elif usage := data.get("usage"): if usage := data.get("usage"):
output.output_tokens = usage.get("completion_tokens") output.output_tokens = usage.get("completion_tokens")
if first_chunk_received: if first_chunk_received:
output.success = True output.success = True
...@@ -611,6 +611,7 @@ ASYNC_REQUEST_FUNCS = { ...@@ -611,6 +611,7 @@ ASYNC_REQUEST_FUNCS = {
"tensorrt-llm": async_request_trt_llm, "tensorrt-llm": async_request_trt_llm,
"scalellm": async_request_openai_completions, "scalellm": async_request_openai_completions,
"sglang": async_request_openai_completions, "sglang": async_request_openai_completions,
"llama.cpp": async_request_openai_completions,
} }
OPENAI_COMPATIBLE_BACKENDS = [ OPENAI_COMPATIBLE_BACKENDS = [
......
...@@ -762,6 +762,10 @@ def main(args: argparse.Namespace): ...@@ -762,6 +762,10 @@ def main(args: argparse.Namespace):
if "temperature" not in sampling_params: if "temperature" not in sampling_params:
sampling_params["temperature"] = 0.0 # Default to greedy decoding. sampling_params["temperature"] = 0.0 # Default to greedy decoding.
if args.backend == "llama.cpp":
# Disable prompt caching in llama.cpp backend
sampling_params["cache_prompt"] = False
# Avoid GC processing "static" data - reduce pause times. # Avoid GC processing "static" data - reduce pause times.
gc.collect() gc.collect()
gc.freeze() gc.freeze()
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment