Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
334535b6
Unverified
Commit
334535b6
authored
Oct 16, 2025
by
Cyrus Leung
Committed by
GitHub
Oct 16, 2025
Browse files
[Benchmark] Show E2EL by default for pooling models (#27014)
Signed-off-by:
DarkLight1337
<
tlleungac@connect.ust.hk
>
parent
dcbb3f18
Changes
1
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
16 additions
and
5 deletions
+16
-5
vllm/benchmarks/serve.py
vllm/benchmarks/serve.py
+16
-5
No files found.
vllm/benchmarks/serve.py
View file @
334535b6
...
...
@@ -58,7 +58,7 @@ TERM_PLOTLIB_AVAILABLE = (importlib.util.find_spec("termplotlib") is not None) a
class
TaskType
(
Enum
):
GENERATION
=
"generation"
EMBEDD
ING
=
"
embedd
ing"
POOL
ING
=
"
pool
ing"
@
dataclass
...
...
@@ -1084,10 +1084,12 @@ def add_cli_args(parser: argparse.ArgumentParser):
parser
.
add_argument
(
"--percentile-metrics"
,
type
=
str
,
default
=
"ttft,tpot,itl"
,
default
=
None
,
help
=
"Comma-separated list of selected metrics to report percentils. "
"This argument specifies the metrics to report percentiles. "
'Allowed metric names are "ttft", "tpot", "itl", "e2el". '
,
'Allowed metric names are "ttft", "tpot", "itl", "e2el". '
'If not specified, defaults to "ttft,tpot,itl" for generative models '
'and "e2el" for pooling models.'
,
)
parser
.
add_argument
(
"--metric-percentiles"
,
...
...
@@ -1310,7 +1312,11 @@ async def main_async(args: argparse.Namespace) -> dict[str, Any]:
goodput_config_dict
=
check_goodput_args
(
args
)
backend
=
args
.
backend
task_type
=
TaskType
.
EMBEDDING
if
"embeddings"
in
backend
else
TaskType
.
GENERATION
task_type
=
(
TaskType
.
POOLING
if
"embeddings"
in
backend
or
"rerank"
in
backend
else
TaskType
.
GENERATION
)
# Collect the sampling parameters.
if
task_type
==
TaskType
.
GENERATION
:
...
...
@@ -1336,12 +1342,17 @@ async def main_async(args: argparse.Namespace) -> dict[str, Any]:
if
"temperature"
not
in
sampling_params
:
sampling_params
[
"temperature"
]
=
0.0
# Default to greedy decoding.
default_percentile_metrics
=
"ttft,tpot,itl"
else
:
sampling_params
=
{}
default_percentile_metrics
=
"e2el"
extra_body
=
args
.
extra_body
or
{}
extra_body
=
{
**
sampling_params
,
**
extra_body
}
percentile_metrics
:
str
=
args
.
percentile_metrics
or
default_percentile_metrics
# Avoid GC processing "static" data - reduce pause times.
gc
.
collect
()
gc
.
freeze
()
...
...
@@ -1360,7 +1371,7 @@ async def main_async(args: argparse.Namespace) -> dict[str, Any]:
burstiness
=
args
.
burstiness
,
disable_tqdm
=
args
.
disable_tqdm
,
profile
=
args
.
profile
,
selected_percentile_metrics
=
args
.
percentile_metrics
.
split
(
","
),
selected_percentile_metrics
=
percentile_metrics
.
split
(
","
),
selected_percentiles
=
[
float
(
p
)
for
p
in
args
.
metric_percentiles
.
split
(
","
)],
ignore_eos
=
args
.
ignore_eos
,
goodput_config_dict
=
goodput_config_dict
,
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment