Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
392c5af4
Unverified
Commit
392c5af4
authored
Jan 28, 2026
by
Bin Bao
Committed by
GitHub
Jan 28, 2026
Browse files
[Benchmark] Add startup benchmarking to buildkite run (#33183)
Signed-off-by:
Bin Bao
<
binbao@meta.com
>
parent
af9b69f9
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
35 additions
and
88 deletions
+35
-88
.buildkite/performance-benchmarks/scripts/run-performance-benchmarks.sh
...formance-benchmarks/scripts/run-performance-benchmarks.sh
+35
-88
No files found.
.buildkite/performance-benchmarks/scripts/run-performance-benchmarks.sh
View file @
392c5af4
...
@@ -25,9 +25,9 @@ check_gpus() {
...
@@ -25,9 +25,9 @@ check_gpus() {
echo
"Need at least 1 GPU to run benchmarking."
echo
"Need at least 1 GPU to run benchmarking."
exit
1
exit
1
fi
fi
declare
-g
arch_suffix
=
''
declare
-g
arch_suffix
=
''
if
command
-v
nvidia-smi
;
then
if
command
-v
nvidia-smi
;
then
declare
-g
gpu_type
=
$(
nvidia-smi
--query-gpu
=
name
--format
=
csv,noheader |
awk
'{print $2}'
)
declare
-g
gpu_type
=
$(
nvidia-smi
--query-gpu
=
name
--format
=
csv,noheader |
awk
'{print $2}'
)
elif
command
-v
amd-smi
;
then
elif
command
-v
amd-smi
;
then
...
@@ -181,19 +181,20 @@ upload_to_buildkite() {
...
@@ -181,19 +181,20 @@ upload_to_buildkite() {
$BUILDKITE_AGENT_COMMAND
artifact upload
"
$RESULTS_FOLDER
/*"
$BUILDKITE_AGENT_COMMAND
artifact upload
"
$RESULTS_FOLDER
/*"
}
}
run_latency_tests
()
{
run_benchmark_tests
()
{
# run latency tests using `vllm bench latency` command
# run benchmark tests using `vllm bench <test_type>` command
# $1: a json file specifying latency test cases
# $1: test type (latency or throughput)
# $2: a json file specifying test cases
local
latency_test_file
local
test_type
=
$1
l
atency_
test_file
=
$
1
l
ocal
test_file
=
$
2
# Iterate over
latency
tests
# Iterate over tests
jq
-c
'.[]'
"
$
latency_
test_file
"
|
while
read
-r
params
;
do
jq
-c
'.[]'
"
$test_file
"
|
while
read
-r
params
;
do
# get the test name, and append the GPU type back to it.
# get the test name, and append the GPU type back to it.
test_name
=
$(
echo
"
$params
"
| jq
-r
'.test_name'
)
test_name
=
$(
echo
"
$params
"
| jq
-r
'.test_name'
)
if
[[
!
"
$test_name
"
=
~ ^
latency
_
]]
;
then
if
[[
!
"
$test_name
"
=
~ ^
${
test_type
}
_
]]
;
then
echo
"In
latency
-test.json, test_name must start with
\"
latency
_
\"
."
echo
"In
${
test_type
}
-test.json, test_name must start with
\"
${
test_type
}
_
\"
."
exit
1
exit
1
fi
fi
...
@@ -204,15 +205,15 @@ run_latency_tests() {
...
@@ -204,15 +205,15 @@ run_latency_tests() {
fi
fi
# get arguments
# get arguments
lat
enc
y
_params
=
$(
echo
"
$params
"
| jq
-r
'.parameters'
)
b
enc
h
_params
=
$(
echo
"
$params
"
| jq
-r
'.parameters'
)
lat
enc
y
_args
=
$(
json2args
"
$
lat
enc
y
_params
"
)
b
enc
h
_args
=
$(
json2args
"
$
b
enc
h
_params
"
)
lat
enc
y
_environment_variables
=
$(
echo
"
$params
"
| jq
-r
'.environment_variables'
)
b
enc
h
_environment_variables
=
$(
echo
"
$params
"
| jq
-r
'.environment_variables'
)
lat
enc
y
_envs
=
$(
json2envs
"
$
lat
enc
y
_environment_variables
"
)
b
enc
h
_envs
=
$(
json2envs
"
$
b
enc
h
_environment_variables
"
)
# check if there is enough GPU to run the test
# check if there is enough GPU to run the test
tp
=
$(
echo
"
$
lat
enc
y
_params
"
| jq
-r
'.tensor_parallel_size'
)
tp
=
$(
echo
"
$
b
enc
h
_params
"
| jq
-r
'.tensor_parallel_size'
)
if
[[
"
$ON_CPU
"
==
"1"
]]
;
then
if
[[
"
$ON_CPU
"
==
"1"
]]
;
then
pp
=
$(
echo
"
$
lat
enc
y
_params
"
| jq
-r
'.pipeline_parallel_size // 1'
)
pp
=
$(
echo
"
$
b
enc
h
_params
"
| jq
-r
'.pipeline_parallel_size // 1'
)
world_size
=
$((
$tp
*
$pp
))
world_size
=
$((
$tp
*
$pp
))
if
[[
$numa_count
-lt
$world_size
&&
-z
"
${
REMOTE_HOST
}
"
]]
;
then
if
[[
$numa_count
-lt
$world_size
&&
-z
"
${
REMOTE_HOST
}
"
]]
;
then
echo
"Required world-size
$world_size
but only
$numa_count
NUMA nodes found. Skip testcase
$test_name
."
echo
"Required world-size
$world_size
but only
$numa_count
NUMA nodes found. Skip testcase
$test_name
."
...
@@ -225,97 +226,42 @@ run_latency_tests() {
...
@@ -225,97 +226,42 @@ run_latency_tests() {
fi
fi
fi
fi
lat
enc
y
_command
=
"
$
lat
enc
y
_envs
vllm bench
latency
\
b
enc
h
_command
=
"
$
b
enc
h
_envs
vllm bench
$test_type
\
--output-json
$RESULTS_FOLDER
/
${
test_name
}
.json
\
--output-json
$RESULTS_FOLDER
/
${
test_name
}
.json
\
$
lat
enc
y
_args
"
$
b
enc
h
_args
"
echo
"Running test case
$test_name
"
echo
"Running test case
$test_name
"
echo
"
Latency
command:
$
lat
enc
y
_command
"
echo
"
${
test_type
^
}
command:
$
b
enc
h
_command
"
# recoding benchmarking command an
g
GPU command
# reco
r
ding benchmarking command an
d
GPU command
jq_output
=
$(
jq
-n
\
jq_output
=
$(
jq
-n
\
--arg
latency
"
$lat
enc
y
_command
"
\
--arg
command
"
$b
enc
h
_command
"
\
--arg
gpu
"
$gpu_type
"
\
--arg
gpu
"
$gpu_type
"
\
--arg
test_type
"
$test_type
"
\
'{
'{
latency
_command: $
latency
,
($test_type + "
_command
")
: $
command
,
gpu_type: $gpu
gpu_type: $gpu
}'
)
}'
)
echo
"
$jq_output
"
>
"
$RESULTS_FOLDER
/
$test_name
.commands"
echo
"
$jq_output
"
>
"
$RESULTS_FOLDER
/
$test_name
.commands"
# run the benchmark
# run the benchmark
eval
"
$
lat
enc
y
_command
"
eval
"
$
b
enc
h
_command
"
kill_gpu_processes
kill_gpu_processes
done
done
}
}
run_throughput_tests
()
{
run_latency_tests
()
{
# run throughput tests using `vllm bench throughput`
run_benchmark_tests
"latency"
"
$1
"
# $1: a json file specifying throughput test cases
}
local
throughput_test_file
throughput_test_file
=
$1
# Iterate over throughput tests
jq
-c
'.[]'
"
$throughput_test_file
"
|
while
read
-r
params
;
do
# get the test name, and append the GPU type back to it.
test_name
=
$(
echo
"
$params
"
| jq
-r
'.test_name'
)
if
[[
!
"
$test_name
"
=
~ ^throughput_
]]
;
then
echo
"In throughput-test.json, test_name must start with
\"
throughput_
\"
."
exit
1
fi
# if TEST_SELECTOR is set, only run the test cases that match the selector
if
[[
-n
"
$TEST_SELECTOR
"
]]
&&
[[
!
"
$test_name
"
=
~
$TEST_SELECTOR
]]
;
then
echo
"Skip test case
$test_name
."
continue
fi
# get arguments
throughput_params
=
$(
echo
"
$params
"
| jq
-r
'.parameters'
)
throughput_args
=
$(
json2args
"
$throughput_params
"
)
throughput_environment_variables
=
$(
echo
"
$params
"
| jq
-r
'.environment_variables'
)
throughput_envs
=
$(
json2envs
"
$throughput_environment_variables
"
)
# check if there is enough GPU to run the test
tp
=
$(
echo
"
$throughput_params
"
| jq
-r
'.tensor_parallel_size'
)
if
[[
"
$ON_CPU
"
==
"1"
]]
;
then
pp
=
$(
echo
"
$throughput_params
"
| jq
-r
'.pipeline_parallel_size // 1'
)
world_size
=
$((
$tp
*
$pp
))
if
[[
$numa_count
-lt
$world_size
&&
-z
"
${
REMOTE_HOST
}
"
]]
;
then
echo
"Required world-size
$world_size
but only
$numa_count
NUMA nodes found. Skip testcase
$test_name
."
continue
fi
else
if
[[
$gpu_count
-lt
$tp
]]
;
then
echo
"Required tensor-parallel-size
$tp
but only
$gpu_count
GPU found. Skip testcase
$test_name
."
continue
fi
fi
throughput_command
=
"
$throughput_envs
vllm bench throughput
\
--output-json
$RESULTS_FOLDER
/
${
test_name
}
.json
\
$throughput_args
"
echo
"Running test case
$test_name
"
echo
"Throughput command:
$throughput_command
"
# recoding benchmarking command ang GPU command
jq_output
=
$(
jq
-n
\
--arg
command
"
$throughput_command
"
\
--arg
gpu
"
$gpu_type
"
\
'{
throughput_command: $command,
gpu_type: $gpu
}'
)
echo
"
$jq_output
"
>
"
$RESULTS_FOLDER
/
$test_name
.commands"
# run the benchmark
eval
"
$throughput_command
"
kill_gpu_processes
run_startup_tests
()
{
run_benchmark_tests
"startup"
"
$1
"
}
done
run_throughput_tests
()
{
run_benchmark_tests
"throughput"
"
$1
"
}
}
run_serving_tests
()
{
run_serving_tests
()
{
...
@@ -534,6 +480,7 @@ main() {
...
@@ -534,6 +480,7 @@ main() {
# benchmarking
# benchmarking
run_serving_tests
$QUICK_BENCHMARK_ROOT
/tests/
"
${
SERVING_JSON
:-
serving
-tests
$ARCH
.json
}
"
run_serving_tests
$QUICK_BENCHMARK_ROOT
/tests/
"
${
SERVING_JSON
:-
serving
-tests
$ARCH
.json
}
"
run_latency_tests
$QUICK_BENCHMARK_ROOT
/tests/
"
${
LATENCY_JSON
:-
latency
-tests
$ARCH
.json
}
"
run_latency_tests
$QUICK_BENCHMARK_ROOT
/tests/
"
${
LATENCY_JSON
:-
latency
-tests
$ARCH
.json
}
"
run_startup_tests
$QUICK_BENCHMARK_ROOT
/tests/
"
${
STARTUP_JSON
:-
startup
-tests
$ARCH
.json
}
"
run_throughput_tests
$QUICK_BENCHMARK_ROOT
/tests/
"
${
THROUGHPUT_JSON
:-
throughput
-tests
$ARCH
.json
}
"
run_throughput_tests
$QUICK_BENCHMARK_ROOT
/tests/
"
${
THROUGHPUT_JSON
:-
throughput
-tests
$ARCH
.json
}
"
# postprocess benchmarking results
# postprocess benchmarking results
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment