Unverified Commit 098b2d66 authored by Nick Hill's avatar Nick Hill Committed by GitHub
Browse files

[Benchmark] Don't default to `temperature==0` in `vllm bench serve` (#32723)


Signed-off-by: default avatarNick Hill <nickhill123@gmail.com>
parent 8ebf271b
...@@ -160,7 +160,6 @@ async def async_request_openai_completions( ...@@ -160,7 +160,6 @@ async def async_request_openai_completions(
if request_func_input.model_name if request_func_input.model_name
else request_func_input.model, else request_func_input.model,
"prompt": request_func_input.prompt, "prompt": request_func_input.prompt,
"temperature": 0.0,
"repetition_penalty": 1.0, "repetition_penalty": 1.0,
"max_tokens": request_func_input.output_len, "max_tokens": request_func_input.output_len,
"logprobs": request_func_input.logprobs, "logprobs": request_func_input.logprobs,
...@@ -294,7 +293,6 @@ async def async_request_openai_chat_completions( ...@@ -294,7 +293,6 @@ async def async_request_openai_chat_completions(
"messages": [ "messages": [
{"role": "user", "content": content}, {"role": "user", "content": content},
], ],
"temperature": 0.0,
"max_completion_tokens": request_func_input.output_len, "max_completion_tokens": request_func_input.output_len,
"stream": True, "stream": True,
"stream_options": { "stream_options": {
...@@ -389,7 +387,6 @@ async def async_request_openai_audio( ...@@ -389,7 +387,6 @@ async def async_request_openai_audio(
"model": request_func_input.model_name "model": request_func_input.model_name
if request_func_input.model_name if request_func_input.model_name
else request_func_input.model, else request_func_input.model,
"temperature": 0.0,
"max_completion_tokens": request_func_input.output_len, "max_completion_tokens": request_func_input.output_len,
"stream": True, "stream": True,
"language": "en", "language": "en",
......
...@@ -1419,8 +1419,7 @@ def add_cli_args(parser: argparse.ArgumentParser): ...@@ -1419,8 +1419,7 @@ def add_cli_args(parser: argparse.ArgumentParser):
type=float, type=float,
default=None, default=None,
help="Temperature sampling parameter. Only has effect on " help="Temperature sampling parameter. Only has effect on "
"openai-compatible backends. If not specified, default to greedy " "openai-compatible backends.",
"decoding (i.e. temperature==0.0).",
) )
sampling_group.add_argument( sampling_group.add_argument(
"--frequency-penalty", "--frequency-penalty",
...@@ -1634,7 +1633,12 @@ async def main_async(args: argparse.Namespace) -> dict[str, Any]: ...@@ -1634,7 +1633,12 @@ async def main_async(args: argparse.Namespace) -> dict[str, Any]:
) )
if "temperature" not in sampling_params: if "temperature" not in sampling_params:
sampling_params["temperature"] = 0.0 # Default to greedy decoding. print(
"WARNING: vllm bench serve no longer sets temperature==0 (greedy) "
"in requests by default. The default will be determined on the "
"server side and can be model/API specific. "
"For the old behavior, include --temperature=0."
)
default_percentile_metrics = "ttft,tpot,itl" default_percentile_metrics = "ttft,tpot,itl"
else: else:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment