run-benchmarks.sh 2.57 KB
Newer Older
Simon Mo's avatar
Simon Mo committed
1
2
3
# This script is run by buildkite to run the benchmarks and upload the results to buildkite

set -ex
Simon Mo's avatar
Simon Mo committed
4
set -o pipefail
Simon Mo's avatar
Simon Mo committed
5
6
7
8

# cd into parent directory of this file
cd "$(dirname "${BASH_SOURCE[0]}")/.."

9
(which wget && which curl) || (apt-get update && apt-get install -y wget curl)
Simon Mo's avatar
Simon Mo committed
10

11
# run python-based benchmarks and upload the result to buildkite
Simon Mo's avatar
Simon Mo committed
12
python3 benchmarks/benchmark_latency.py 2>&1 | tee benchmark_latency.txt
13
bench_latency_exit_code=$?
Simon Mo's avatar
Simon Mo committed
14
15

python3 benchmarks/benchmark_throughput.py --input-len 256 --output-len 256 2>&1 | tee benchmark_throughput.txt
16
bench_throughput_exit_code=$?
Simon Mo's avatar
Simon Mo committed
17

18
# run server-based benchmarks and upload the result to buildkite
Simon Mo's avatar
Simon Mo committed
19
20
21
22
23
24
25
python3 -m vllm.entrypoints.openai.api_server --model meta-llama/Llama-2-7b-chat-hf &
server_pid=$!
wget https://huggingface.co/datasets/anon8231489123/ShareGPT_Vicuna_unfiltered/resolve/main/ShareGPT_V3_unfiltered_cleaned_split.json

# wait for server to start, timeout after 600 seconds
timeout 600 bash -c 'until curl localhost:8000/v1/models; do sleep 1; done' || exit 1
python3 benchmarks/benchmark_serving.py \
26
    --backend openai \
Simon Mo's avatar
Simon Mo committed
27
28
29
30
    --dataset ./ShareGPT_V3_unfiltered_cleaned_split.json \
    --model meta-llama/Llama-2-7b-chat-hf \
    --num-prompts 20 \
    --endpoint /v1/completions \
31
32
33
    --tokenizer meta-llama/Llama-2-7b-chat-hf \
    --save-result \
    2>&1 | tee benchmark_serving.txt
Simon Mo's avatar
Simon Mo committed
34
35
36
bench_serving_exit_code=$?
kill $server_pid

Simon Mo's avatar
Simon Mo committed
37
38
# write the results into a markdown file
echo "### Latency Benchmarks" >> benchmark_results.md
Simon Mo's avatar
Simon Mo committed
39
sed -n '1p' benchmark_latency.txt >> benchmark_results.md # first line
Simon Mo's avatar
Simon Mo committed
40
echo "" >> benchmark_results.md
Simon Mo's avatar
Simon Mo committed
41
42
sed -n '$p' benchmark_latency.txt >> benchmark_results.md # last line

Simon Mo's avatar
Simon Mo committed
43
echo "### Throughput Benchmarks" >> benchmark_results.md
Simon Mo's avatar
Simon Mo committed
44
sed -n '1p' benchmark_throughput.txt >> benchmark_results.md # first line
Simon Mo's avatar
Simon Mo committed
45
echo "" >> benchmark_results.md
Simon Mo's avatar
Simon Mo committed
46
47
48
49
50
sed -n '$p' benchmark_throughput.txt >> benchmark_results.md # last line

echo "### Serving Benchmarks" >> benchmark_results.md
sed -n '1p' benchmark_serving.txt >> benchmark_results.md # first line
echo "" >> benchmark_results.md
51
tail -n 13 benchmark_serving.txt >> benchmark_results.md # last 13 lines
Simon Mo's avatar
Simon Mo committed
52
53
54

# upload the results to buildkite
/workspace/buildkite-agent annotate --style "info" --context "benchmark-results" < benchmark_results.md
55
56
57
58
59
60
61
62
63

# exit with the exit code of the benchmarks
if [ $bench_latency_exit_code -ne 0 ]; then
    exit $bench_latency_exit_code
fi

if [ $bench_throughput_exit_code -ne 0 ]; then
    exit $bench_throughput_exit_code
fi
Simon Mo's avatar
Simon Mo committed
64
65
66
67

if [ $bench_serving_exit_code -ne 0 ]; then
    exit $bench_serving_exit_code
fi
68
69

/workspace/buildkite-agent artifact upload openai-*.json