Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
098b2d66
Unverified
Commit
098b2d66
authored
Jan 22, 2026
by
Nick Hill
Committed by
GitHub
Jan 22, 2026
Browse files
[Benchmark] Don't default to `temperature==0` in `vllm bench serve` (#32723)
Signed-off-by:
Nick Hill
<
nickhill123@gmail.com
>
parent
8ebf271b
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
7 additions
and
6 deletions
+7
-6
vllm/benchmarks/lib/endpoint_request_func.py
vllm/benchmarks/lib/endpoint_request_func.py
+0
-3
vllm/benchmarks/serve.py
vllm/benchmarks/serve.py
+7
-3
No files found.
vllm/benchmarks/lib/endpoint_request_func.py
View file @
098b2d66
...
...
@@ -160,7 +160,6 @@ async def async_request_openai_completions(
if
request_func_input
.
model_name
else
request_func_input
.
model
,
"prompt"
:
request_func_input
.
prompt
,
"temperature"
:
0.0
,
"repetition_penalty"
:
1.0
,
"max_tokens"
:
request_func_input
.
output_len
,
"logprobs"
:
request_func_input
.
logprobs
,
...
...
@@ -294,7 +293,6 @@ async def async_request_openai_chat_completions(
"messages"
:
[
{
"role"
:
"user"
,
"content"
:
content
},
],
"temperature"
:
0.0
,
"max_completion_tokens"
:
request_func_input
.
output_len
,
"stream"
:
True
,
"stream_options"
:
{
...
...
@@ -389,7 +387,6 @@ async def async_request_openai_audio(
"model"
:
request_func_input
.
model_name
if
request_func_input
.
model_name
else
request_func_input
.
model
,
"temperature"
:
0.0
,
"max_completion_tokens"
:
request_func_input
.
output_len
,
"stream"
:
True
,
"language"
:
"en"
,
...
...
vllm/benchmarks/serve.py
View file @
098b2d66
...
...
@@ -1419,8 +1419,7 @@ def add_cli_args(parser: argparse.ArgumentParser):
type
=
float
,
default
=
None
,
help
=
"Temperature sampling parameter. Only has effect on "
"openai-compatible backends. If not specified, default to greedy "
"decoding (i.e. temperature==0.0)."
,
"openai-compatible backends."
,
)
sampling_group
.
add_argument
(
"--frequency-penalty"
,
...
...
@@ -1634,7 +1633,12 @@ async def main_async(args: argparse.Namespace) -> dict[str, Any]:
)
if
"temperature"
not
in
sampling_params
:
sampling_params
[
"temperature"
]
=
0.0
# Default to greedy decoding.
print
(
"WARNING: vllm bench serve no longer sets temperature==0 (greedy) "
"in requests by default. The default will be determined on the "
"server side and can be model/API specific. "
"For the old behavior, include --temperature=0."
)
default_percentile_metrics
=
"ttft,tpot,itl"
else
:
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment