Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
9ca66ecc
Unverified
Commit
9ca66ecc
authored
Jan 30, 2026
by
Huy Do
Committed by
GitHub
Jan 30, 2026
Browse files
Indicate compile mode in the benchmark results (#32990)
Signed-off-by:
Huy Do
<
huydhn@gmail.com
>
parent
c3a9752b
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
42 additions
and
3 deletions
+42
-3
.buildkite/performance-benchmarks/scripts/run-performance-benchmarks.sh
...formance-benchmarks/scripts/run-performance-benchmarks.sh
+8
-3
vllm/benchmarks/lib/utils.py
vllm/benchmarks/lib/utils.py
+34
-0
No files found.
.buildkite/performance-benchmarks/scripts/run-performance-benchmarks.sh
View file @
9ca66ecc
...
@@ -393,6 +393,11 @@ run_serving_tests() {
...
@@ -393,6 +393,11 @@ run_serving_tests() {
fi
fi
fi
fi
# save the compilation mode and optimization level on the serving results
# whenever they are set
compilation_config_mode
=
$(
echo
"
$server_params
"
| jq
-r
'."compilation_config.mode" // empty'
)
optimization_level
=
$(
echo
"
$server_params
"
| jq
-r
'.optimization_level // empty'
)
# iterate over different QPS
# iterate over different QPS
for
qps
in
$qps_list
;
do
for
qps
in
$qps_list
;
do
# remove the surrounding single quote from qps
# remove the surrounding single quote from qps
...
@@ -406,15 +411,15 @@ run_serving_tests() {
...
@@ -406,15 +411,15 @@ run_serving_tests() {
for
max_concurrency
in
$max_concurrency_list
;
do
for
max_concurrency
in
$max_concurrency_list
;
do
new_test_name
=
$test_name
"_qps_"
$qps
"_concurrency_"
$max_concurrency
new_test_name
=
$test_name
"_qps_"
$qps
"_concurrency_"
$max_concurrency
echo
" new test name
$new_test_name
"
echo
" new test name
$new_test_name
"
# pass the tensor parallel size
to
the c
lient so that it can be displayed
# pass the tensor parallel size
,
the c
ompilation mode, and the optimization
# on the benchmark dashboard
#
level to the client so that they can be used
on the benchmark dashboard
client_command
=
"vllm bench serve
\
client_command
=
"vllm bench serve
\
--save-result
\
--save-result
\
--result-dir
$RESULTS_FOLDER
\
--result-dir
$RESULTS_FOLDER
\
--result-filename
${
new_test_name
}
.json
\
--result-filename
${
new_test_name
}
.json
\
--request-rate
$qps
\
--request-rate
$qps
\
--max-concurrency
$max_concurrency
\
--max-concurrency
$max_concurrency
\
--metadata
"
tensor_parallel_size
=
$tp
"
\
--metadata tensor_parallel_size=
$tp
compilation_config.mode=
$compilation_config_mode
optimization_level=
$optimization_level
\
$client_args
$client_remote_args
"
$client_args
$client_remote_args
"
echo
"Running test case
$test_name
with qps
$qps
"
echo
"Running test case
$test_name
with qps
$qps
"
...
...
vllm/benchmarks/lib/utils.py
View file @
9ca66ecc
...
@@ -8,6 +8,32 @@ import os
...
@@ -8,6 +8,32 @@ import os
from
typing
import
Any
from
typing
import
Any
def
extract_field
(
args
:
argparse
.
Namespace
,
extra_info
:
dict
[
str
,
Any
],
field_name
:
str
)
->
str
:
if
field_name
in
extra_info
:
return
extra_info
[
field_name
]
v
=
args
# For example, args.compilation_config.mode
for
nested_field
in
field_name
.
split
(
"."
):
if
not
hasattr
(
v
,
nested_field
):
return
""
v
=
getattr
(
v
,
nested_field
)
return
v
def
use_compile
(
args
:
argparse
.
Namespace
,
extra_info
:
dict
[
str
,
Any
])
->
bool
:
"""
Check if the benchmark is run with torch.compile
"""
return
not
(
extract_field
(
args
,
extra_info
,
"compilation_config.mode"
)
==
"0"
or
"eager"
in
getattr
(
args
,
"output_json"
,
""
)
or
"eager"
in
getattr
(
args
,
"result_filename"
,
""
)
)
def
convert_to_pytorch_benchmark_format
(
def
convert_to_pytorch_benchmark_format
(
args
:
argparse
.
Namespace
,
metrics
:
dict
[
str
,
list
],
extra_info
:
dict
[
str
,
Any
]
args
:
argparse
.
Namespace
,
metrics
:
dict
[
str
,
list
],
extra_info
:
dict
[
str
,
Any
]
)
->
list
:
)
->
list
:
...
@@ -26,6 +52,14 @@ def convert_to_pytorch_benchmark_format(
...
@@ -26,6 +52,14 @@ def convert_to_pytorch_benchmark_format(
"name"
:
"vLLM benchmark"
,
"name"
:
"vLLM benchmark"
,
"extra_info"
:
{
"extra_info"
:
{
"args"
:
vars
(
args
),
"args"
:
vars
(
args
),
"compilation_config.mode"
:
extract_field
(
args
,
extra_info
,
"compilation_config.mode"
),
"optimization_level"
:
extract_field
(
args
,
extra_info
,
"optimization_level"
),
# A boolean field used by vLLM benchmark HUD dashboard
"use_compile"
:
use_compile
(
args
,
extra_info
),
},
},
},
},
"model"
:
{
"model"
:
{
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment