Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
5f1d6af2
Unverified
Commit
5f1d6af2
authored
Nov 20, 2024
by
Simon Mo
Committed by
GitHub
Nov 20, 2024
Browse files
[perf bench] H200 development (#9768)
Signed-off-by:
simon-mo
<
simon.mo@hey.com
>
parent
772a6673
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
32 additions
and
7 deletions
+32
-7
.buildkite/nightly-benchmarks/benchmark-pipeline.yaml
.buildkite/nightly-benchmarks/benchmark-pipeline.yaml
+23
-0
.buildkite/nightly-benchmarks/scripts/convert-results-json-to-markdown.py
...ly-benchmarks/scripts/convert-results-json-to-markdown.py
+5
-0
.buildkite/nightly-benchmarks/scripts/run-performance-benchmarks.sh
.../nightly-benchmarks/scripts/run-performance-benchmarks.sh
+4
-7
No files found.
.buildkite/nightly-benchmarks/benchmark-pipeline.yaml
View file @
5f1d6af2
...
...
@@ -9,7 +9,9 @@ steps:
-
image
:
badouralix/curl-jq
command
:
-
sh .buildkite/nightly-benchmarks/scripts/wait-for-image.sh
-
wait
-
label
:
"
A100"
agents
:
queue
:
A100
...
...
@@ -41,6 +43,27 @@ steps:
-
name
:
devshm
emptyDir
:
medium
:
Memory
-
label
:
"
H200"
agents
:
queue
:
H200
plugins
:
-
docker#v5.12.0
:
image
:
public.ecr.aws/q9t5s3a7/vllm-ci-test-repo:$BUILDKITE_COMMIT
command
:
-
bash
-
.buildkite/nightly-benchmarks/scripts/run-performance-benchmarks.sh
mount-buildkite-agent
:
true
propagate-environment
:
true
ipc
:
host
gpus
:
4,5,6,7
volumes
:
-
/data/benchmark-hf-cache:/root/.cache/huggingface
environment
:
-
VLLM_USAGE_SOURCE
-
HF_TOKEN
# - label: "H100"
# agents:
# queue: H100
...
...
.buildkite/nightly-benchmarks/scripts/convert-results-json-to-markdown.py
View file @
5f1d6af2
...
...
@@ -157,6 +157,11 @@ if __name__ == "__main__":
throughput_results
,
serving_results
)
# Sort all dataframes by their respective "Test name" columns
for
df
in
[
latency_results
,
serving_results
,
throughput_results
]:
if
not
df
.
empty
:
df
.
sort_values
(
by
=
"Test name"
,
inplace
=
True
)
# get markdown tables
latency_md_table
=
tabulate
(
latency_results
,
headers
=
'keys'
,
...
...
.buildkite/nightly-benchmarks/scripts/run-performance-benchmarks.sh
View file @
5f1d6af2
...
...
@@ -6,6 +6,7 @@
# Do not set -e, as the mixtral 8x22B model tends to crash occasionally
# and we still want to see other benchmarking results even when mixtral crashes.
set
-x
set
-o
pipefail
check_gpus
()
{
...
...
@@ -85,11 +86,7 @@ kill_gpu_processes() {
ps
-aux
lsof
-t
-i
:8000 | xargs
-r
kill
-9
pkill
-f
pt_main_thread
# this line doesn't work now
# ps aux | grep python | grep openai | awk '{print $2}' | xargs -r kill -9
pkill
-f
python3
pkill
-f
/usr/bin/python3
pgrep python3 | xargs
-r
kill
-9
# wait until GPU memory usage smaller than 1GB
...
...
@@ -289,7 +286,7 @@ run_serving_tests() {
# run the server
echo
"Running test case
$test_name
"
echo
"Server command:
$server_command
"
eval
"
$server_command
"
&
bash
-c
"
$server_command
"
&
server_pid
=
$!
# wait until the server is alive
...
...
@@ -322,7 +319,7 @@ run_serving_tests() {
echo
"Running test case
$test_name
with qps
$qps
"
echo
"Client command:
$client_command
"
eval
"
$client_command
"
bash
-c
"
$client_command
"
# record the benchmarking commands
jq_output
=
$(
jq
-n
\
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment