Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
66c508b1
Unverified
Commit
66c508b1
authored
Jun 06, 2025
by
QiliangCui
Committed by
GitHub
Jun 06, 2025
Browse files
[TPU][Test] Add script to run benchmark on TPU for buildkite (#19039)
Signed-off-by:
Qiliang Cui
<
derrhein@gmail.com
>
parent
84166fee
Changes
4
Show whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
234 additions
and
0 deletions
+234
-0
.buildkite/scripts/tpu/cleanup_docker.sh
.buildkite/scripts/tpu/cleanup_docker.sh
+24
-0
.buildkite/scripts/tpu/config_v6e_1.env
.buildkite/scripts/tpu/config_v6e_1.env
+14
-0
.buildkite/scripts/tpu/docker_run_bm.sh
.buildkite/scripts/tpu/docker_run_bm.sh
+102
-0
.buildkite/scripts/tpu/run_bm.sh
.buildkite/scripts/tpu/run_bm.sh
+94
-0
No files found.
.buildkite/scripts/tpu/cleanup_docker.sh
0 → 100755
View file @
66c508b1
#!/bin/bash
set
-euo
pipefail
docker_root
=
$(
docker info
-f
'{{.DockerRootDir}}'
)
if
[
-z
"
$docker_root
"
]
;
then
echo
"Failed to determine Docker root directory."
exit
1
fi
echo
"Docker root directory:
$docker_root
"
# Check disk usage of the filesystem where Docker's root directory is located
disk_usage
=
$(
df
"
$docker_root
"
|
tail
-1
|
awk
'{print $5}'
|
sed
's/%//'
)
# Define the threshold
threshold
=
70
if
[
"
$disk_usage
"
-gt
"
$threshold
"
]
;
then
echo
"Disk usage is above
$threshold
%. Cleaning up Docker images and volumes..."
# Remove dangling images (those that are not tagged and not used by any container)
docker image prune
-f
# Remove unused volumes / force the system prune for old images as well.
docker volume prune
-f
&&
docker system prune
--force
--filter
"until=72h"
--all
echo
"Docker images and volumes cleanup completed."
else
echo
"Disk usage is below
$threshold
%. No cleanup needed."
fi
.buildkite/scripts/tpu/config_v6e_1.env
0 → 100644
View file @
66c508b1
# Environment config
TEST_NAME=llama8b
CONTAINER_NAME=vllm-tpu
# vllm config
MODEL=meta-llama/Llama-3.1-8B-Instruct
MAX_NUM_SEQS=512
MAX_NUM_BATCHED_TOKENS=512
TENSOR_PARALLEL_SIZE=1
MAX_MODEL_LEN=2048
DOWNLOAD_DIR=/mnt/disks/persist
EXPECTED_THROUGHPUT=8.0
INPUT_LEN=1800
OUTPUT_LEN=128
.buildkite/scripts/tpu/docker_run_bm.sh
0 → 100755
View file @
66c508b1
#!/bin/bash
if
[
!
-f
"
$1
"
]
;
then
echo
"Error: The env file '
$1
' does not exist."
exit
1
# Exit the script with a non-zero status to indicate an error
fi
ENV_FILE
=
$1
# For testing on local vm, use `set -a` to export all variables
source
/etc/environment
source
$ENV_FILE
remove_docker_container
()
{
docker
rm
-f
tpu-test
||
true
;
docker
rm
-f
vllm-tpu
||
true
;
docker
rm
-f
$CONTAINER_NAME
||
true
;
}
trap
remove_docker_container EXIT
# Remove the container that might not be cleaned up in the previous run.
remove_docker_container
# Build docker image.
# TODO: build the image outside the script and share the image with other
# tpu test if building time is too long.
DOCKER_BUILDKIT
=
1 docker build
\
--build-arg
max_jobs
=
16
\
--build-arg
USE_SCCACHE
=
1
\
--build-arg
GIT_REPO_CHECK
=
0
\
--tag
vllm/vllm-tpu-bm
\
--progress
plain
-f
docker/Dockerfile.tpu
.
LOG_ROOT
=
$(
mktemp
-d
)
# If mktemp fails, set -e will cause the script to exit.
echo
"Results will be stored in:
$LOG_ROOT
"
if
[
-z
"
$HF_TOKEN
"
]
;
then
echo
"Error: HF_TOKEN is not set or is empty."
exit
1
fi
# Make sure mounted disk or dir exists
if
[
!
-d
"
$DOWNLOAD_DIR
"
]
;
then
echo
"Error: Folder
$DOWNLOAD_DIR
does not exist. This is useually a mounted drive. If no mounted drive, just create a folder."
exit
1
fi
echo
"Run model
$MODEL
"
echo
echo
"starting docker...
$CONTAINER_NAME
"
echo
docker run
\
-v
$DOWNLOAD_DIR
:
$DOWNLOAD_DIR
\
--env-file
$ENV_FILE
\
-e
HF_TOKEN
=
"
$HF_TOKEN
"
\
-e
TARGET_COMMIT
=
$BUILDKITE_COMMIT
\
-e
MODEL
=
$MODEL
\
-e
WORKSPACE
=
/workspace
\
--name
$CONTAINER_NAME
\
-d
\
--privileged
\
--network
host
\
-v
/dev/shm:/dev/shm
\
vllm/vllm-tpu-bm
tail
-f
/dev/null
echo
"run script..."
echo
docker
exec
"
$CONTAINER_NAME
"
/bin/bash
-c
".buildkite/scripts/hardware_ci/run_bm.sh"
echo
"copy result back..."
VLLM_LOG
=
"
$LOG_ROOT
/
$TEST_NAME
"
_vllm_log.txt
BM_LOG
=
"
$LOG_ROOT
/
$TEST_NAME
"
_bm_log.txt
docker
cp
"
$CONTAINER_NAME
:/workspace/vllm_log.txt"
"
$VLLM_LOG
"
docker
cp
"
$CONTAINER_NAME
:/workspace/bm_log.txt"
"
$BM_LOG
"
throughput
=
$(
grep
"Request throughput (req/s):"
"
$BM_LOG
"
|
sed
's/[^0-9.]//g'
)
echo
"throughput for
$TEST_NAME
at
$BUILDKITE_COMMIT
:
$throughput
"
if
[
"
$BUILDKITE
"
=
"true"
]
;
then
echo
"Running inside Buildkite"
buildkite-agent artifact upload
"
$VLLM_LOG
"
buildkite-agent artifact upload
"
$BM_LOG
"
else
echo
"Not running inside Buildkite"
fi
#
# compare the throughput with EXPECTED_THROUGHPUT
# and assert meeting the expectation
#
if
[[
-z
"
$throughput
"
||
!
"
$throughput
"
=
~ ^[0-9]+
([
.][0-9]+
)
?
$
]]
;
then
echo
"Failed to get the throughput"
exit
1
fi
if
((
$(
echo
"
$throughput
<
$EXPECTED_THROUGHPUT
"
| bc
-l
)
))
;
then
echo
"Error: throughput(
$throughput
) is less than expected(
$EXPECTED_THROUGHPUT
)"
exit
1
fi
.buildkite/scripts/tpu/run_bm.sh
0 → 100755
View file @
66c508b1
#!/bin/bash
set
-euo
pipefail
VLLM_LOG
=
"
$WORKSPACE
/vllm_log.txt"
BM_LOG
=
"
$WORKSPACE
/bm_log.txt"
if
[
-n
"
$TARGET_COMMIT
"
]
;
then
head_hash
=
$(
git rev-parse HEAD
)
if
[
"
$TARGET_COMMIT
"
!=
"
$head_hash
"
]
;
then
echo
"Error: target commit
$TARGET_COMMIT
does not match HEAD:
$head_hash
"
exit
1
fi
fi
echo
"model:
$MODEL
"
echo
#
# create a log folder
#
mkdir
"
$WORKSPACE
/log"
# TODO: Move to image building.
pip
install
pandas
pip
install
datasets
#
# create sonnet_4x
#
echo
"Create sonnet_4x.txt"
echo
""
>
benchmarks/sonnet_4x.txt
for
_
in
{
1..4
}
do
cat
benchmarks/sonnet.txt
>>
benchmarks/sonnet_4x.txt
done
#
# start vllm service in backend
#
echo
"lanching vllm..."
echo
"logging to
$VLLM_LOG
"
echo
VLLM_USE_V1
=
1 vllm serve
$MODEL
\
--seed
42
\
--disable-log-requests
\
--max-num-seqs
$MAX_NUM_SEQS
\
--max-num-batched-tokens
$MAX_NUM_BATCHED_TOKENS
\
--tensor-parallel-size
$TENSOR_PARALLEL_SIZE
\
--no-enable-prefix-caching
\
--download_dir
$DOWNLOAD_DIR
\
--max-model-len
$MAX_MODEL_LEN
>
"
$VLLM_LOG
"
2>&1 &
echo
"wait for 20 minutes.."
echo
# sleep 1200
# wait for 10 minutes...
for
i
in
{
1..120
}
;
do
# TODO: detect other type of errors.
if
grep
-Fq
"raise RuntimeError"
"
$VLLM_LOG
"
;
then
echo
"Detected RuntimeError, exiting."
exit
1
elif
grep
-Fq
"Application startup complete"
"
$VLLM_LOG
"
;
then
echo
"Application started"
break
else
echo
"wait for 10 seconds..."
sleep
10
fi
done
#
# run test
#
echo
"run benchmark test..."
echo
"logging to
$BM_LOG
"
echo
python benchmarks/benchmark_serving.py
\
--backend
vllm
\
--model
$MODEL
\
--dataset-name
sonnet
\
--dataset-path
benchmarks/sonnet_4x.txt
\
--sonnet-input-len
$INPUT_LEN
\
--sonnet-output-len
$OUTPUT_LEN
\
--ignore-eos
>
"
$BM_LOG
"
echo
"completed..."
echo
throughput
=
$(
grep
"Request throughput (req/s):"
"
$BM_LOG
"
|
sed
's/[^0-9.]//g'
)
echo
"throughput:
$throughput
"
echo
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment