Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
fead53ba
Unverified
Commit
fead53ba
authored
Jan 17, 2025
by
Kunshang Ji
Committed by
GitHub
Jan 17, 2025
Browse files
[CI]add genai-perf benchmark in nightly benchmark (#10704)
Signed-off-by:
Kunshang Ji
<
kunshang.ji@intel.com
>
parent
ebc73f28
Changes
4
Show whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
196 additions
and
4 deletions
+196
-4
.buildkite/nightly-benchmarks/scripts/run-nightly-benchmarks.sh
...kite/nightly-benchmarks/scripts/run-nightly-benchmarks.sh
+107
-0
.buildkite/nightly-benchmarks/tests/genai-perf-tests.json
.buildkite/nightly-benchmarks/tests/genai-perf-tests.json
+23
-0
requirements-test.in
requirements-test.in
+3
-0
requirements-test.txt
requirements-test.txt
+63
-4
No files found.
.buildkite/nightly-benchmarks/scripts/run-nightly-benchmarks.sh
View file @
fead53ba
...
...
@@ -301,6 +301,104 @@ run_serving_tests() {
kill_gpu_processes
}
run_genai_perf_tests
()
{
# run genai-perf tests
# $1: a json file specifying genai-perf test cases
local
genai_perf_test_file
genai_perf_test_file
=
$1
# Iterate over genai-perf tests
jq
-c
'.[]'
"
$genai_perf_test_file
"
|
while
read
-r
params
;
do
# get the test name, and append the GPU type back to it.
test_name
=
$(
echo
"
$params
"
| jq
-r
'.test_name'
)
# if TEST_SELECTOR is set, only run the test cases that match the selector
if
[[
-n
"
$TEST_SELECTOR
"
]]
&&
[[
!
"
$test_name
"
=
~
$TEST_SELECTOR
]]
;
then
echo
"Skip test case
$test_name
."
continue
fi
# prepend the current serving engine to the test name
test_name
=
${
CURRENT_LLM_SERVING_ENGINE
}
_
${
test_name
}
# get common parameters
common_params
=
$(
echo
"
$params
"
| jq
-r
'.common_parameters'
)
model
=
$(
echo
"
$common_params
"
| jq
-r
'.model'
)
tp
=
$(
echo
"
$common_params
"
| jq
-r
'.tp'
)
dataset_name
=
$(
echo
"
$common_params
"
| jq
-r
'.dataset_name'
)
dataset_path
=
$(
echo
"
$common_params
"
| jq
-r
'.dataset_path'
)
port
=
$(
echo
"
$common_params
"
| jq
-r
'.port'
)
num_prompts
=
$(
echo
"
$common_params
"
| jq
-r
'.num_prompts'
)
reuse_server
=
$(
echo
"
$common_params
"
| jq
-r
'.reuse_server'
)
# get client and server arguments
server_params
=
$(
echo
"
$params
"
| jq
-r
".
${
CURRENT_LLM_SERVING_ENGINE
}
_server_parameters"
)
qps_list
=
$(
echo
"
$params
"
| jq
-r
'.qps_list'
)
qps_list
=
$(
echo
"
$qps_list
"
| jq
-r
'.[] | @sh'
)
echo
"Running over qps list
$qps_list
"
# check if there is enough GPU to run the test
if
[[
$gpu_count
-lt
$tp
]]
;
then
echo
"Required num-shard
$tp
but only
$gpu_count
GPU found. Skip testcase
$test_name
."
continue
fi
if
[[
$reuse_server
==
"true"
]]
;
then
echo
"Reuse previous server for test case
$test_name
"
else
kill_gpu_processes
bash
"
$VLLM_SOURCE_CODE_LOC
/.buildkite/nightly-benchmarks/scripts/launch-server.sh"
\
"
$server_params
"
"
$common_params
"
fi
if
wait_for_server
;
then
echo
""
echo
"
$CURRENT_LLM_SERVING_ENGINE
server is up and running."
else
echo
""
echo
"
$CURRENT_LLM_SERVING_ENGINE
failed to start within the timeout period."
break
fi
# iterate over different QPS
for
qps
in
$qps_list
;
do
# remove the surrounding single quote from qps
if
[[
"
$qps
"
==
*
"inf"
*
]]
;
then
echo
"qps was
$qps
"
qps
=
$num_prompts
echo
"now qps is
$qps
"
fi
new_test_name
=
$test_name
"_qps_"
$qps
backend
=
$CURRENT_LLM_SERVING_ENGINE
if
[[
"
$backend
"
==
*
"vllm"
*
]]
;
then
backend
=
"vllm"
fi
#TODO: add output dir.
client_command
=
"genai-perf profile
\
-m
$model
\
--service-kind openai
\
--backend vllm
\
--endpoint-type chat
\
--streaming
\
--url localhost:
$port
\
--request-rate
$qps
\
--num-prompts
$num_prompts
\
"
echo
"Client command:
$client_command
"
eval
"
$client_command
"
#TODO: process/record outputs
done
done
kill_gpu_processes
}
prepare_dataset
()
{
...
...
@@ -328,12 +426,17 @@ main() {
pip
install
-U
transformers
pip
install
-r
requirements-dev.txt
which genai-perf
# check storage
df
-h
ensure_installed wget
ensure_installed curl
ensure_installed jq
# genai-perf dependency
ensure_installed libb64-0d
prepare_dataset
...
...
@@ -345,6 +448,10 @@ main() {
# run the test
run_serving_tests
"
$BENCHMARK_ROOT
/tests/nightly-tests.json"
# run genai-perf tests
run_genai_perf_tests
"
$BENCHMARK_ROOT
/tests/genai-perf-tests.json"
mv
artifacts/
$RESULTS_FOLDER
/
# upload benchmark results to buildkite
python3
-m
pip
install
tabulate pandas
python3
"
$BENCHMARK_ROOT
/scripts/summary-nightly-results.py"
...
...
.buildkite/nightly-benchmarks/tests/genai-perf-tests.json
0 → 100644
View file @
fead53ba
[
{
"test_name"
:
"llama8B_tp1_genai_perf"
,
"qps_list"
:
[
4
,
8
,
16
,
32
],
"common_parameters"
:
{
"model"
:
"meta-llama/Meta-Llama-3-8B-Instruct"
,
"tp"
:
1
,
"port"
:
8000
,
"num_prompts"
:
500
,
"reuse_server"
:
false
},
"vllm_server_parameters"
:
{
"disable_log_stats"
:
""
,
"disable_log_requests"
:
""
,
"gpu_memory_utilization"
:
0.9
,
"num_scheduler_steps"
:
10
,
"max_num_seqs"
:
512
,
"dtype"
:
"bfloat16"
},
"genai_perf_input_parameters"
:
{
}
}
]
\ No newline at end of file
requirements-test.in
View file @
fead53ba
...
...
@@ -29,4 +29,7 @@ lm-eval[api]==0.4.4 # required for model evaluation test
bitsandbytes>=0.45.0
buildkite-test-collector==0.1.9
genai_perf==0.0.8
tritonclient==2.51.0
numpy < 2.0.0
requirements-test.txt
View file @
fead53ba
...
...
@@ -37,7 +37,7 @@ audioread==3.0.1
# via librosa
awscli==1.35.23
# via -r requirements-test.in
bitsandbytes
>
=0.45.0
bitsandbytes
=
=0.45.0
# via -r requirements-test.in
black==24.10.0
# via datamodel-code-generator
...
...
@@ -75,6 +75,8 @@ colorama==0.4.6
# tqdm-multiprocess
contourpy==1.3.0
# via matplotlib
cramjam==2.9.0
# via fastparquet
cupy-cuda12x==13.3.0
# via ray
cycler==0.12.1
...
...
@@ -109,6 +111,8 @@ email-validator==2.2.0
# via pydantic
evaluate==0.4.3
# via lm-eval
fastparquet==2024.11.0
# via genai-perf
fastrlock==0.8.2
# via cupy-cuda12x
filelock==3.16.1
...
...
@@ -130,8 +134,11 @@ fsspec[http]==2024.9.0
# via
# datasets
# evaluate
# fastparquet
# huggingface-hub
# torch
genai-perf==0.0.8
# via -r requirements-test.in
genson==1.3.0
# via datamodel-code-generator
h11==0.14.0
...
...
@@ -186,6 +193,8 @@ jsonschema==4.23.0
# ray
jsonschema-specifications==2024.10.1
# via jsonschema
kaleido==0.2.1
# via genai-perf
kiwisolver==1.4.7
# via matplotlib
lazy-loader==0.4
...
...
@@ -200,6 +209,8 @@ lm-eval[api]==0.4.4
# via -r requirements-test.in
lxml==5.3.0
# via sacrebleu
markdown-it-py==3.0.0
# via rich
markupsafe==3.0.2
# via jinja2
matplotlib==3.9.2
...
...
@@ -209,6 +220,8 @@ mbstrdecoder==1.1.3
# dataproperty
# pytablewriter
# typepy
mdurl==0.1.2
# via markdown-it-py
mistral-common[opencv]==1.5.1
# via
# -r requirements-test.in
...
...
@@ -249,6 +262,8 @@ numpy==1.26.4
# datasets
# decord
# evaluate
# fastparquet
# genai-perf
# librosa
# matplotlib
# mistral-common
...
...
@@ -256,15 +271,18 @@ numpy==1.26.4
# numexpr
# opencv-python-headless
# pandas
# patsy
# peft
# rouge-score
# sacrebleu
# scikit-learn
# scipy
# soxr
# statsmodels
# tensorizer
# torchvision
# transformers
# tritonclient
nvidia-cublas-cu12==12.4.5.8
# via
# nvidia-cudnn-cu12
...
...
@@ -306,30 +324,39 @@ packaging==24.1
# datamodel-code-generator
# datasets
# evaluate
# fastparquet
# huggingface-hub
# lazy-loader
# matplotlib
# peft
# plotly
# pooch
# pytest
# pytest-rerunfailures
# ray
# statsmodels
# transformers
# typepy
pandas==2.2.3
# via
# datasets
# evaluate
# fastparquet
# genai-perf
# statsmodels
pathspec==0.12.1
# via black
pathvalidate==3.2.1
# via pytablewriter
patsy==1.0.1
# via statsmodels
peft==0.13.2
# via
# -r requirements-test.in
# lm-eval
pillow==10.4.0
# via
# genai-perf
# matplotlib
# mistral-common
# sentence-transformers
...
...
@@ -338,6 +365,8 @@ platformdirs==4.3.6
# via
# black
# pooch
plotly==5.24.1
# via genai-perf
pluggy==1.5.0
# via pytest
pooch==1.8.2
...
...
@@ -360,7 +389,9 @@ psutil==6.1.0
py==1.11.0
# via pytest-forked
pyarrow==18.0.0
# via datasets
# via
# datasets
# genai-perf
pyasn1==0.6.1
# via rsa
pybind11==2.13.6
...
...
@@ -373,6 +404,8 @@ pydantic[email]==2.9.2
# mistral-common
pydantic-core==2.23.4
# via pydantic
pygments==2.18.0
# via rich
pyparsing==3.2.0
# via matplotlib
pytablewriter==1.2.0
...
...
@@ -381,14 +414,18 @@ pytest==8.3.3
# via
# -r requirements-test.in
# buildkite-test-collector
# genai-perf
# pytest-asyncio
# pytest-forked
# pytest-mock
# pytest-rerunfailures
# pytest-shard
pytest-asyncio==0.24.0
# via -r requirements-test.in
pytest-forked==1.6.0
# via -r requirements-test.in
pytest-mock==3.14.0
# via genai-perf
pytest-rerunfailures==14.0
# via -r requirements-test.in
pytest-shard==0.1.2
...
...
@@ -399,6 +436,8 @@ python-dateutil==2.9.0.post0
# matplotlib
# pandas
# typepy
python-rapidjson==1.20
# via tritonclient
pytz==2024.2
# via
# pandas
...
...
@@ -409,9 +448,11 @@ pyyaml==6.0.2
# awscli
# datamodel-code-generator
# datasets
# genai-perf
# huggingface-hub
# peft
# ray
# responses
# timm
# transformers
ray[adag]==2.40.0
...
...
@@ -438,8 +479,13 @@ requests==2.32.3
# mistral-common
# pooch
# ray
# responses
# tiktoken
# transformers
responses==0.25.3
# via genai-perf
rich==13.9.4
# via genai-perf
rouge-score==0.1.2
# via lm-eval
rpds-py==0.20.1
...
...
@@ -470,6 +516,7 @@ scipy==1.13.1
# librosa
# scikit-learn
# sentence-transformers
# statsmodels
sentence-transformers==3.2.1
# via -r requirements-test.in
sentencepiece==0.2.0
...
...
@@ -490,6 +537,8 @@ soxr==0.5.0.post1
# via librosa
sqlitedict==2.1.0
# via lm-eval
statsmodels==0.14.4
# via genai-perf
sympy==1.13.1
# via torch
tabledata==1.3.3
...
...
@@ -499,7 +548,9 @@ tabulate==0.9.0
tcolorpy==0.1.6
# via pytablewriter
tenacity==9.0.0
# via lm-eval
# via
# lm-eval
# plotly
tensorizer==2.9.0
# via -r requirements-test.in
threadpoolctl==3.5.0
...
...
@@ -540,6 +591,7 @@ tqdm-multiprocess==0.0.11
# via lm-eval
transformers==4.47.0
# via
# genai-perf
# lm-eval
# peft
# sentence-transformers
...
...
@@ -548,6 +600,10 @@ transformers-stream-generator==0.0.5
# via -r requirements-test.in
triton==3.1.0
# via torch
tritonclient==2.51.0
# via
# -r requirements-test.in
# genai-perf
typepy[datetime]==1.3.2
# via
# dataproperty
...
...
@@ -555,6 +611,7 @@ typepy[datetime]==1.3.2
# tabledata
typing-extensions==4.12.2
# via
# bitsandbytes
# huggingface-hub
# librosa
# mistral-common
...
...
@@ -563,10 +620,12 @@ typing-extensions==4.12.2
# torch
tzdata==2024.2
# via pandas
urllib3==
1.26.20
urllib3==
2.2.3
# via
# botocore
# requests
# responses
# tritonclient
word2number==1.1
# via lm-eval
xxhash==3.5.0
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment