Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
3be5b26a
Unverified
Commit
3be5b26a
authored
Nov 07, 2024
by
Russell Bryant
Committed by
GitHub
Nov 07, 2024
Browse files
[CI/Build] Add shell script linting using shellcheck (#7925)
Signed-off-by:
Russell Bryant
<
rbryant@redhat.com
>
parent
de0e61a3
Changes
28
Show whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
157 additions
and
117 deletions
+157
-117
.buildkite/lm-eval-harness/run-lm-eval-gsm-hf-baseline.sh
.buildkite/lm-eval-harness/run-lm-eval-gsm-hf-baseline.sh
+3
-3
.buildkite/lm-eval-harness/run-lm-eval-gsm-vllm-baseline.sh
.buildkite/lm-eval-harness/run-lm-eval-gsm-vllm-baseline.sh
+3
-3
.buildkite/lm-eval-harness/run-tests.sh
.buildkite/lm-eval-harness/run-tests.sh
+1
-1
.buildkite/nightly-benchmarks/scripts/launch-server.sh
.buildkite/nightly-benchmarks/scripts/launch-server.sh
+25
-38
.buildkite/nightly-benchmarks/scripts/nightly-annotate.sh
.buildkite/nightly-benchmarks/scripts/nightly-annotate.sh
+6
-6
.buildkite/nightly-benchmarks/scripts/run-nightly-benchmarks.sh
...kite/nightly-benchmarks/scripts/run-nightly-benchmarks.sh
+14
-16
.buildkite/nightly-benchmarks/scripts/run-performance-benchmarks.sh
.../nightly-benchmarks/scripts/run-performance-benchmarks.sh
+9
-10
.buildkite/nightly-benchmarks/scripts/wait-for-image.sh
.buildkite/nightly-benchmarks/scripts/wait-for-image.sh
+2
-2
.buildkite/run-amd-test.sh
.buildkite/run-amd-test.sh
+18
-16
.buildkite/run-benchmarks.sh
.buildkite/run-benchmarks.sh
+2
-0
.buildkite/run-cpu-test-ppc64le.sh
.buildkite/run-cpu-test-ppc64le.sh
+3
-1
.buildkite/run-cpu-test.sh
.buildkite/run-cpu-test.sh
+2
-0
.buildkite/run-multi-node-test.sh
.buildkite/run-multi-node-test.sh
+15
-12
.buildkite/run-neuron-test.sh
.buildkite/run-neuron-test.sh
+5
-3
.buildkite/run-openvino-test.sh
.buildkite/run-openvino-test.sh
+2
-0
.buildkite/run-tpu-test.sh
.buildkite/run-tpu-test.sh
+3
-1
.buildkite/run-xpu-test.sh
.buildkite/run-xpu-test.sh
+2
-0
.github/workflows/scripts/cuda-install.sh
.github/workflows/scripts/cuda-install.sh
+4
-4
.github/workflows/scripts/pytorch-install.sh
.github/workflows/scripts/pytorch-install.sh
+1
-1
.github/workflows/shellcheck.yml
.github/workflows/shellcheck.yml
+37
-0
No files found.
.buildkite/lm-eval-harness/run-lm-eval-gsm-hf-baseline.sh
View file @
3be5b26a
...
@@ -41,6 +41,6 @@ while getopts "m:b:l:f:" OPT; do
...
@@ -41,6 +41,6 @@ while getopts "m:b:l:f:" OPT; do
done
done
lm_eval
--model
hf
\
lm_eval
--model
hf
\
--model_args
pretrained
=
$MODEL
,parallelize
=
True
\
--model_args
"
pretrained=
$MODEL
,parallelize=True
"
\
--tasks
gsm8k
--num_fewshot
$FEWSHOT
--limit
$LIMIT
\
--tasks
gsm8k
--num_fewshot
"
$FEWSHOT
"
--limit
"
$LIMIT
"
\
--batch_size
$BATCH_SIZE
--batch_size
"
$BATCH_SIZE
"
.buildkite/lm-eval-harness/run-lm-eval-gsm-vllm-baseline.sh
View file @
3be5b26a
...
@@ -46,6 +46,6 @@ while getopts "m:b:l:f:t:" OPT; do
...
@@ -46,6 +46,6 @@ while getopts "m:b:l:f:t:" OPT; do
done
done
lm_eval
--model
vllm
\
lm_eval
--model
vllm
\
--model_args
pretrained
=
$MODEL
,tensor_parallel_size
=
$TP_SIZE
,distributed_executor_backend
=
"
ray
"
,trust_remote_code
=
true
,max_model_len
=
4096
\
--model_args
"
pretrained=
$MODEL
,tensor_parallel_size=
$TP_SIZE
,distributed_executor_backend=ray,trust_remote_code=true,max_model_len=4096
"
\
--tasks
gsm8k
--num_fewshot
$FEWSHOT
--limit
$LIMIT
\
--tasks
gsm8k
--num_fewshot
"
$FEWSHOT
"
--limit
"
$LIMIT
"
\
--batch_size
$BATCH_SIZE
--batch_size
"
$BATCH_SIZE
"
.buildkite/lm-eval-harness/run-tests.sh
View file @
3be5b26a
...
@@ -30,7 +30,7 @@ while getopts "c:t:" OPT; do
...
@@ -30,7 +30,7 @@ while getopts "c:t:" OPT; do
done
done
# Parse list of configs.
# Parse list of configs.
IFS
=
$'
\n
'
read
-d
''
-r
-a
MODEL_CONFIGS <
$CONFIG
IFS
=
$'
\n
'
read
-d
''
-r
-a
MODEL_CONFIGS <
"
$CONFIG
"
for
MODEL_CONFIG
in
"
${
MODEL_CONFIGS
[@]
}
"
for
MODEL_CONFIG
in
"
${
MODEL_CONFIGS
[@]
}
"
do
do
...
...
.buildkite/nightly-benchmarks/scripts/launch-server.sh
View file @
3be5b26a
...
@@ -50,31 +50,30 @@ launch_trt_server() {
...
@@ -50,31 +50,30 @@ launch_trt_server() {
git clone https://github.com/triton-inference-server/tensorrtllm_backend.git
git clone https://github.com/triton-inference-server/tensorrtllm_backend.git
git lfs
install
git lfs
install
cd
tensorrtllm_backend
cd
tensorrtllm_backend
git checkout
$trt_llm_version
git checkout
"
$trt_llm_version
"
tensorrtllm_backend_dir
=
$(
pwd
)
git submodule update
--init
--recursive
git submodule update
--init
--recursive
# build trtllm engine
# build trtllm engine
cd
/tensorrtllm_backend
cd
/tensorrtllm_backend
cd
./tensorrt_llm/examples/
${
model_type
}
cd
"
./tensorrt_llm/examples/
${
model_type
}
"
python3 convert_checkpoint.py
\
python3 convert_checkpoint.py
\
--model_dir
${
model_path
}
\
--model_dir
"
${
model_path
}
"
\
--dtype
${
model_dtype
}
\
--dtype
"
${
model_dtype
}
"
\
--tp_size
${
model_tp_size
}
\
--tp_size
"
${
model_tp_size
}
"
\
--output_dir
${
trt_model_path
}
--output_dir
"
${
trt_model_path
}
"
trtllm-build
\
trtllm-build
\
--checkpoint_dir
${
trt_model_path
}
\
--checkpoint_dir
"
${
trt_model_path
}
"
\
--use_fused_mlp
\
--use_fused_mlp
\
--reduce_fusion
disable
\
--reduce_fusion
disable
\
--workers
8
\
--workers
8
\
--gpt_attention_plugin
${
model_dtype
}
\
--gpt_attention_plugin
"
${
model_dtype
}
"
\
--gemm_plugin
${
model_dtype
}
\
--gemm_plugin
"
${
model_dtype
}
"
\
--tp_size
${
model_tp_size
}
\
--tp_size
"
${
model_tp_size
}
"
\
--max_batch_size
${
max_batch_size
}
\
--max_batch_size
"
${
max_batch_size
}
"
\
--max_input_len
${
max_input_len
}
\
--max_input_len
"
${
max_input_len
}
"
\
--max_seq_len
${
max_seq_len
}
\
--max_seq_len
"
${
max_seq_len
}
"
\
--max_num_tokens
${
max_num_tokens
}
\
--max_num_tokens
"
${
max_num_tokens
}
"
\
--output_dir
${
trt_engine_path
}
--output_dir
"
${
trt_engine_path
}
"
# handle triton protobuf files and launch triton server
# handle triton protobuf files and launch triton server
cd
/tensorrtllm_backend
cd
/tensorrtllm_backend
...
@@ -82,15 +81,15 @@ launch_trt_server() {
...
@@ -82,15 +81,15 @@ launch_trt_server() {
cp
-r
all_models/inflight_batcher_llm/
*
triton_model_repo/
cp
-r
all_models/inflight_batcher_llm/
*
triton_model_repo/
cd
triton_model_repo
cd
triton_model_repo
rm
-rf
./tensorrt_llm/1/
*
rm
-rf
./tensorrt_llm/1/
*
cp
-r
${
trt_engine_path
}
/
*
./tensorrt_llm/1
cp
-r
"
${
trt_engine_path
}
"
/
*
./tensorrt_llm/1
python3 ../tools/fill_template.py
-i
tensorrt_llm/config.pbtxt triton_backend:tensorrtllm,engine_dir:/tensorrtllm_backend/triton_model_repo/tensorrt_llm/1,decoupled_mode:true,batching_strategy:inflight_fused_batching,batch_scheduler_policy:guaranteed_no_evict,exclude_input_in_output:true,triton_max_batch_size:2048,max_queue_delay_microseconds:0,max_beam_width:1,max_queue_size:2048,enable_kv_cache_reuse:false
python3 ../tools/fill_template.py
-i
tensorrt_llm/config.pbtxt triton_backend:tensorrtllm,engine_dir:/tensorrtllm_backend/triton_model_repo/tensorrt_llm/1,decoupled_mode:true,batching_strategy:inflight_fused_batching,batch_scheduler_policy:guaranteed_no_evict,exclude_input_in_output:true,triton_max_batch_size:2048,max_queue_delay_microseconds:0,max_beam_width:1,max_queue_size:2048,enable_kv_cache_reuse:false
python3 ../tools/fill_template.py
-i
preprocessing/config.pbtxt triton_max_batch_size:2048,tokenizer_dir:
$model_path
,preprocessing_instance_count:5
python3 ../tools/fill_template.py
-i
preprocessing/config.pbtxt
"
triton_max_batch_size:2048,tokenizer_dir:
$model_path
,preprocessing_instance_count:5
"
python3 ../tools/fill_template.py
-i
postprocessing/config.pbtxt triton_max_batch_size:2048,tokenizer_dir:
$model_path
,postprocessing_instance_count:5,skip_special_tokens:false
python3 ../tools/fill_template.py
-i
postprocessing/config.pbtxt
"
triton_max_batch_size:2048,tokenizer_dir:
$model_path
,postprocessing_instance_count:5,skip_special_tokens:false
"
python3 ../tools/fill_template.py
-i
ensemble/config.pbtxt triton_max_batch_size:
$max_batch_size
python3 ../tools/fill_template.py
-i
ensemble/config.pbtxt triton_max_batch_size:
"
$max_batch_size
"
python3 ../tools/fill_template.py
-i
tensorrt_llm_bls/config.pbtxt triton_max_batch_size:
$max_batch_size
,decoupled_mode:true,accumulate_tokens:
"
False
"
,bls_instance_count:1
python3 ../tools/fill_template.py
-i
tensorrt_llm_bls/config.pbtxt
"
triton_max_batch_size:
$max_batch_size
,decoupled_mode:true,accumulate_tokens:False,bls_instance_count:1
"
cd
/tensorrtllm_backend
cd
/tensorrtllm_backend
python3 scripts/launch_triton_server.py
\
python3 scripts/launch_triton_server.py
\
--world_size
=
${
model_tp_size
}
\
--world_size
=
"
${
model_tp_size
}
"
\
--model_repo
=
/tensorrtllm_backend/triton_model_repo &
--model_repo
=
/tensorrtllm_backend/triton_model_repo &
}
}
...
@@ -98,10 +97,7 @@ launch_trt_server() {
...
@@ -98,10 +97,7 @@ launch_trt_server() {
launch_tgi_server
()
{
launch_tgi_server
()
{
model
=
$(
echo
"
$common_params
"
| jq
-r
'.model'
)
model
=
$(
echo
"
$common_params
"
| jq
-r
'.model'
)
tp
=
$(
echo
"
$common_params
"
| jq
-r
'.tp'
)
tp
=
$(
echo
"
$common_params
"
| jq
-r
'.tp'
)
dataset_name
=
$(
echo
"
$common_params
"
| jq
-r
'.dataset_name'
)
dataset_path
=
$(
echo
"
$common_params
"
| jq
-r
'.dataset_path'
)
port
=
$(
echo
"
$common_params
"
| jq
-r
'.port'
)
port
=
$(
echo
"
$common_params
"
| jq
-r
'.port'
)
num_prompts
=
$(
echo
"
$common_params
"
| jq
-r
'.num_prompts'
)
server_args
=
$(
json2args
"
$server_params
"
)
server_args
=
$(
json2args
"
$server_params
"
)
if
echo
"
$common_params
"
| jq
-e
'has("fp8")'
>
/dev/null
;
then
if
echo
"
$common_params
"
| jq
-e
'has("fp8")'
>
/dev/null
;
then
...
@@ -129,10 +125,7 @@ launch_tgi_server() {
...
@@ -129,10 +125,7 @@ launch_tgi_server() {
launch_lmdeploy_server
()
{
launch_lmdeploy_server
()
{
model
=
$(
echo
"
$common_params
"
| jq
-r
'.model'
)
model
=
$(
echo
"
$common_params
"
| jq
-r
'.model'
)
tp
=
$(
echo
"
$common_params
"
| jq
-r
'.tp'
)
tp
=
$(
echo
"
$common_params
"
| jq
-r
'.tp'
)
dataset_name
=
$(
echo
"
$common_params
"
| jq
-r
'.dataset_name'
)
dataset_path
=
$(
echo
"
$common_params
"
| jq
-r
'.dataset_path'
)
port
=
$(
echo
"
$common_params
"
| jq
-r
'.port'
)
port
=
$(
echo
"
$common_params
"
| jq
-r
'.port'
)
num_prompts
=
$(
echo
"
$common_params
"
| jq
-r
'.num_prompts'
)
server_args
=
$(
json2args
"
$server_params
"
)
server_args
=
$(
json2args
"
$server_params
"
)
server_command
=
"lmdeploy serve api_server
$model
\
server_command
=
"lmdeploy serve api_server
$model
\
...
@@ -149,10 +142,7 @@ launch_sglang_server() {
...
@@ -149,10 +142,7 @@ launch_sglang_server() {
model
=
$(
echo
"
$common_params
"
| jq
-r
'.model'
)
model
=
$(
echo
"
$common_params
"
| jq
-r
'.model'
)
tp
=
$(
echo
"
$common_params
"
| jq
-r
'.tp'
)
tp
=
$(
echo
"
$common_params
"
| jq
-r
'.tp'
)
dataset_name
=
$(
echo
"
$common_params
"
| jq
-r
'.dataset_name'
)
dataset_path
=
$(
echo
"
$common_params
"
| jq
-r
'.dataset_path'
)
port
=
$(
echo
"
$common_params
"
| jq
-r
'.port'
)
port
=
$(
echo
"
$common_params
"
| jq
-r
'.port'
)
num_prompts
=
$(
echo
"
$common_params
"
| jq
-r
'.num_prompts'
)
server_args
=
$(
json2args
"
$server_params
"
)
server_args
=
$(
json2args
"
$server_params
"
)
if
echo
"
$common_params
"
| jq
-e
'has("fp8")'
>
/dev/null
;
then
if
echo
"
$common_params
"
| jq
-e
'has("fp8")'
>
/dev/null
;
then
...
@@ -185,10 +175,7 @@ launch_vllm_server() {
...
@@ -185,10 +175,7 @@ launch_vllm_server() {
model
=
$(
echo
"
$common_params
"
| jq
-r
'.model'
)
model
=
$(
echo
"
$common_params
"
| jq
-r
'.model'
)
tp
=
$(
echo
"
$common_params
"
| jq
-r
'.tp'
)
tp
=
$(
echo
"
$common_params
"
| jq
-r
'.tp'
)
dataset_name
=
$(
echo
"
$common_params
"
| jq
-r
'.dataset_name'
)
dataset_path
=
$(
echo
"
$common_params
"
| jq
-r
'.dataset_path'
)
port
=
$(
echo
"
$common_params
"
| jq
-r
'.port'
)
port
=
$(
echo
"
$common_params
"
| jq
-r
'.port'
)
num_prompts
=
$(
echo
"
$common_params
"
| jq
-r
'.num_prompts'
)
server_args
=
$(
json2args
"
$server_params
"
)
server_args
=
$(
json2args
"
$server_params
"
)
if
echo
"
$common_params
"
| jq
-e
'has("fp8")'
>
/dev/null
;
then
if
echo
"
$common_params
"
| jq
-e
'has("fp8")'
>
/dev/null
;
then
...
@@ -217,19 +204,19 @@ launch_vllm_server() {
...
@@ -217,19 +204,19 @@ launch_vllm_server() {
main
()
{
main
()
{
if
[[
$CURRENT_LLM_SERVING_ENGINE
==
"trt"
]]
;
then
if
[[
"
$CURRENT_LLM_SERVING_ENGINE
"
==
"trt"
]]
;
then
launch_trt_server
launch_trt_server
fi
fi
if
[[
$CURRENT_LLM_SERVING_ENGINE
==
"tgi"
]]
;
then
if
[[
"
$CURRENT_LLM_SERVING_ENGINE
"
==
"tgi"
]]
;
then
launch_tgi_server
launch_tgi_server
fi
fi
if
[[
$CURRENT_LLM_SERVING_ENGINE
==
"lmdeploy"
]]
;
then
if
[[
"
$CURRENT_LLM_SERVING_ENGINE
"
==
"lmdeploy"
]]
;
then
launch_lmdeploy_server
launch_lmdeploy_server
fi
fi
if
[[
$CURRENT_LLM_SERVING_ENGINE
==
"sglang"
]]
;
then
if
[[
"
$CURRENT_LLM_SERVING_ENGINE
"
==
"sglang"
]]
;
then
launch_sglang_server
launch_sglang_server
fi
fi
...
...
.buildkite/nightly-benchmarks/scripts/nightly-annotate.sh
View file @
3be5b26a
...
@@ -16,10 +16,10 @@ main() {
...
@@ -16,10 +16,10 @@ main() {
fi
fi
# initial annotation
# initial annotation
description
=
"
$VLLM_SOURCE_CODE_LOC
/.buildkite/nightly-benchmarks/nightly-descriptions.md"
#
description="$VLLM_SOURCE_CODE_LOC/.buildkite/nightly-benchmarks/nightly-descriptions.md"
# download results
# download results
cd
$VLLM_SOURCE_CODE_LOC
/benchmarks
cd
"
$VLLM_SOURCE_CODE_LOC
/benchmarks
"
mkdir
-p
results/
mkdir
-p
results/
/workspace/buildkite-agent artifact download
'results/*nightly_results.json'
results/
/workspace/buildkite-agent artifact download
'results/*nightly_results.json'
results/
ls
ls
...
@@ -30,15 +30,15 @@ main() {
...
@@ -30,15 +30,15 @@ main() {
/workspace/buildkite-agent artifact upload
"results.zip"
/workspace/buildkite-agent artifact upload
"results.zip"
# upload benchmarking scripts
# upload benchmarking scripts
cd
$VLLM_SOURCE_CODE_LOC
/
cd
"
$VLLM_SOURCE_CODE_LOC
/
"
zip
-r
nightly-benchmarks.zip .buildkite/ benchmarks/
zip
-r
nightly-benchmarks.zip .buildkite/ benchmarks/
/workspace/buildkite-agent artifact upload
"nightly-benchmarks.zip"
/workspace/buildkite-agent artifact upload
"nightly-benchmarks.zip"
cd
$VLLM_SOURCE_CODE_LOC
/.buildkite/nightly-benchmarks/
cd
"
$VLLM_SOURCE_CODE_LOC
/.buildkite/nightly-benchmarks/
"
# upload benchmarking pipeline
# upload benchmarking pipeline
/workspace/buildkite-agent artifact upload
"nightly-pipeline.yaml"
/workspace/buildkite-agent artifact upload
"nightly-pipeline.yaml"
cd
$VLLM_SOURCE_CODE_LOC
/.buildkite/nightly-benchmarks/
cd
"
$VLLM_SOURCE_CODE_LOC
/.buildkite/nightly-benchmarks/
"
/workspace/buildkite-agent annotate
--style
"success"
--context
"nightly-benchmarks-results"
--append
< nightly-annotation.md
/workspace/buildkite-agent annotate
--style
"success"
--context
"nightly-benchmarks-results"
--append
< nightly-annotation.md
...
...
.buildkite/nightly-benchmarks/scripts/run-nightly-benchmarks.sh
View file @
3be5b26a
...
@@ -12,7 +12,7 @@ check_gpus() {
...
@@ -12,7 +12,7 @@ check_gpus() {
echo
"Need at least 1 GPU to run benchmarking."
echo
"Need at least 1 GPU to run benchmarking."
exit
1
exit
1
fi
fi
declare
-g
gpu_type
=
$(
echo
$(
nvidia-smi
--query-gpu
=
name
--format
=
csv,noheader
)
|
awk
'{print $2}'
)
declare
-g
gpu_type
=
"
$(
nvidia-smi
--query-gpu
=
name
--format
=
csv,noheader |
awk
'{print $2}'
)
"
echo
"GPU type is
$gpu_type
"
echo
"GPU type is
$gpu_type
"
}
}
...
@@ -102,7 +102,7 @@ kill_gpu_processes() {
...
@@ -102,7 +102,7 @@ kill_gpu_processes() {
pkill
-f
text-generation
pkill
-f
text-generation
pkill
-f
lmdeploy
pkill
-f
lmdeploy
while
[
$(
nvidia-smi
--query-gpu
=
memory.used
--format
=
csv,noheader,nounits |
head
-n
1
)
-ge
1000
]
;
do
while
[
"
$(
nvidia-smi
--query-gpu
=
memory.used
--format
=
csv,noheader,nounits |
head
-n
1
)
"
-ge
1000
]
;
do
sleep
1
sleep
1
done
done
}
}
...
@@ -119,8 +119,8 @@ wait_for_server() {
...
@@ -119,8 +119,8 @@ wait_for_server() {
ensure_installed
()
{
ensure_installed
()
{
# Ensure that the given command is installed by apt-get
# Ensure that the given command is installed by apt-get
local
cmd
=
$1
local
cmd
=
$1
if
!
which
$cmd
>
/dev/null
;
then
if
!
which
"
$cmd
"
>
/dev/null
;
then
apt-get update
&&
apt-get
install
-y
$cmd
apt-get update
&&
apt-get
install
-y
"
$cmd
"
fi
fi
}
}
...
@@ -173,13 +173,11 @@ run_serving_tests() {
...
@@ -173,13 +173,11 @@ run_serving_tests() {
echo
"Reuse previous server for test case
$test_name
"
echo
"Reuse previous server for test case
$test_name
"
else
else
kill_gpu_processes
kill_gpu_processes
bash
$VLLM_SOURCE_CODE_LOC
/.buildkite/nightly-benchmarks/scripts/launch-server.sh
\
bash
"
$VLLM_SOURCE_CODE_LOC
/.buildkite/nightly-benchmarks/scripts/launch-server.sh
"
\
"
$server_params
"
"
$common_params
"
"
$server_params
"
"
$common_params
"
fi
fi
wait_for_server
if
wait_for_server
;
then
if
[
$?
-eq
0
]
;
then
echo
""
echo
""
echo
"
$CURRENT_LLM_SERVING_ENGINE
server is up and running."
echo
"
$CURRENT_LLM_SERVING_ENGINE
server is up and running."
else
else
...
@@ -190,13 +188,13 @@ run_serving_tests() {
...
@@ -190,13 +188,13 @@ run_serving_tests() {
# prepare tokenizer
# prepare tokenizer
# this is required for lmdeploy.
# this is required for lmdeploy.
cd
$VLLM_SOURCE_CODE_LOC
/benchmarks
cd
"
$VLLM_SOURCE_CODE_LOC
/benchmarks
"
rm
-rf
/tokenizer_cache
rm
-rf
/tokenizer_cache
mkdir
/tokenizer_cache
mkdir
/tokenizer_cache
python3 ../.buildkite/nightly-benchmarks/scripts/download-tokenizer.py
\
python3 ../.buildkite/nightly-benchmarks/scripts/download-tokenizer.py
\
--model
"
$model
"
\
--model
"
$model
"
\
--cachedir
/tokenizer_cache
--cachedir
/tokenizer_cache
cd
$VLLM_SOURCE_CODE_LOC
/benchmarks
cd
"
$VLLM_SOURCE_CODE_LOC
/benchmarks
"
# change model name for lmdeploy (it will not follow standard hf name)
# change model name for lmdeploy (it will not follow standard hf name)
...
@@ -307,11 +305,11 @@ run_serving_tests() {
...
@@ -307,11 +305,11 @@ run_serving_tests() {
prepare_dataset
()
{
prepare_dataset
()
{
# download sharegpt dataset
# download sharegpt dataset
cd
$VLLM_SOURCE_CODE_LOC
/benchmarks
cd
"
$VLLM_SOURCE_CODE_LOC
/benchmarks
"
wget https://huggingface.co/datasets/anon8231489123/ShareGPT_Vicuna_unfiltered/resolve/main/ShareGPT_V3_unfiltered_cleaned_split.json
wget https://huggingface.co/datasets/anon8231489123/ShareGPT_Vicuna_unfiltered/resolve/main/ShareGPT_V3_unfiltered_cleaned_split.json
# duplicate sonnet by 4x, to allow benchmarking with input length 2048
# duplicate sonnet by 4x, to allow benchmarking with input length 2048
cd
$VLLM_SOURCE_CODE_LOC
/benchmarks
cd
"
$VLLM_SOURCE_CODE_LOC
/benchmarks
"
echo
""
>
sonnet_4x.txt
echo
""
>
sonnet_4x.txt
for
_
in
{
1..4
}
for
_
in
{
1..4
}
do
do
...
@@ -339,17 +337,17 @@ main() {
...
@@ -339,17 +337,17 @@ main() {
prepare_dataset
prepare_dataset
cd
$VLLM_SOURCE_CODE_LOC
/benchmarks
cd
"
$VLLM_SOURCE_CODE_LOC
/benchmarks
"
declare
-g
RESULTS_FOLDER
=
results/
declare
-g
RESULTS_FOLDER
=
results/
mkdir
-p
$RESULTS_FOLDER
mkdir
-p
$RESULTS_FOLDER
BENCHMARK_ROOT
=
$VLLM_SOURCE_CODE_LOC
/.buildkite/nightly-benchmarks/
BENCHMARK_ROOT
=
"
$VLLM_SOURCE_CODE_LOC
/.buildkite/nightly-benchmarks/
"
# run the test
# run the test
run_serving_tests
$BENCHMARK_ROOT
/tests/nightly-tests.json
run_serving_tests
"
$BENCHMARK_ROOT
/tests/nightly-tests.json
"
# upload benchmark results to buildkite
# upload benchmark results to buildkite
python3
-m
pip
install
tabulate pandas
python3
-m
pip
install
tabulate pandas
python3
$BENCHMARK_ROOT
/scripts/summary-nightly-results.py
python3
"
$BENCHMARK_ROOT
/scripts/summary-nightly-results.py
"
upload_to_buildkite
upload_to_buildkite
}
}
...
...
.buildkite/nightly-benchmarks/scripts/run-performance-benchmarks.sh
View file @
3be5b26a
...
@@ -17,7 +17,7 @@ check_gpus() {
...
@@ -17,7 +17,7 @@ check_gpus() {
echo
"Need at least 1 GPU to run benchmarking."
echo
"Need at least 1 GPU to run benchmarking."
exit
1
exit
1
fi
fi
declare
-g
gpu_type
=
$(
echo
$(
nvidia-smi
--query-gpu
=
name
--format
=
csv,noheader
)
|
awk
'{print $2}'
)
declare
-g
gpu_type
=
$(
nvidia-smi
--query-gpu
=
name
--format
=
csv,noheader |
awk
'{print $2}'
)
echo
"GPU type is
$gpu_type
"
echo
"GPU type is
$gpu_type
"
}
}
...
@@ -93,7 +93,7 @@ kill_gpu_processes() {
...
@@ -93,7 +93,7 @@ kill_gpu_processes() {
# wait until GPU memory usage smaller than 1GB
# wait until GPU memory usage smaller than 1GB
while
[
$(
nvidia-smi
--query-gpu
=
memory.used
--format
=
csv,noheader,nounits |
head
-n
1
)
-ge
1000
]
;
do
while
[
"
$(
nvidia-smi
--query-gpu
=
memory.used
--format
=
csv,noheader,nounits |
head
-n
1
)
"
-ge
1000
]
;
do
sleep
1
sleep
1
done
done
...
@@ -117,7 +117,7 @@ upload_to_buildkite() {
...
@@ -117,7 +117,7 @@ upload_to_buildkite() {
fi
fi
# Use the determined command to annotate and upload artifacts
# Use the determined command to annotate and upload artifacts
$BUILDKITE_AGENT_COMMAND
annotate
--style
"info"
--context
"
$BUILDKITE_LABEL
-benchmark-results"
<
$RESULTS_FOLDER
/benchmark_results.md
$BUILDKITE_AGENT_COMMAND
annotate
--style
"info"
--context
"
$BUILDKITE_LABEL
-benchmark-results"
<
"
$RESULTS_FOLDER
/benchmark_results.md
"
$BUILDKITE_AGENT_COMMAND
artifact upload
"
$RESULTS_FOLDER
/*"
$BUILDKITE_AGENT_COMMAND
artifact upload
"
$RESULTS_FOLDER
/*"
}
}
...
@@ -150,7 +150,7 @@ run_latency_tests() {
...
@@ -150,7 +150,7 @@ run_latency_tests() {
# check if there is enough GPU to run the test
# check if there is enough GPU to run the test
tp
=
$(
echo
"
$latency_params
"
| jq
-r
'.tensor_parallel_size'
)
tp
=
$(
echo
"
$latency_params
"
| jq
-r
'.tensor_parallel_size'
)
if
[[
$gpu_count
-lt
$tp
]]
;
then
if
[[
$gpu_count
-lt
$tp
]]
;
then
echo
"Required tensor-parallel-size
$tp
but only
$gpu_count
GPU found. Skip testcase
$testname
."
echo
"Required tensor-parallel-size
$tp
but only
$gpu_count
GPU found. Skip testcase
$test
_
name
."
continue
continue
fi
fi
...
@@ -206,9 +206,9 @@ run_throughput_tests() {
...
@@ -206,9 +206,9 @@ run_throughput_tests() {
throughput_args
=
$(
json2args
"
$throughput_params
"
)
throughput_args
=
$(
json2args
"
$throughput_params
"
)
# check if there is enough GPU to run the test
# check if there is enough GPU to run the test
tp
=
$(
echo
$throughput_params
| jq
-r
'.tensor_parallel_size'
)
tp
=
$(
echo
"
$throughput_params
"
| jq
-r
'.tensor_parallel_size'
)
if
[[
$gpu_count
-lt
$tp
]]
;
then
if
[[
$gpu_count
-lt
$tp
]]
;
then
echo
"Required tensor-parallel-size
$tp
but only
$gpu_count
GPU found. Skip testcase
$testname
."
echo
"Required tensor-parallel-size
$tp
but only
$gpu_count
GPU found. Skip testcase
$test
_
name
."
continue
continue
fi
fi
...
@@ -270,7 +270,7 @@ run_serving_tests() {
...
@@ -270,7 +270,7 @@ run_serving_tests() {
# check if there is enough GPU to run the test
# check if there is enough GPU to run the test
tp
=
$(
echo
"
$server_params
"
| jq
-r
'.tensor_parallel_size'
)
tp
=
$(
echo
"
$server_params
"
| jq
-r
'.tensor_parallel_size'
)
if
[[
$gpu_count
-lt
$tp
]]
;
then
if
[[
$gpu_count
-lt
$tp
]]
;
then
echo
"Required tensor-parallel-size
$tp
but only
$gpu_count
GPU found. Skip testcase
$testname
."
echo
"Required tensor-parallel-size
$tp
but only
$gpu_count
GPU found. Skip testcase
$test
_
name
."
continue
continue
fi
fi
...
@@ -278,7 +278,7 @@ run_serving_tests() {
...
@@ -278,7 +278,7 @@ run_serving_tests() {
server_model
=
$(
echo
"
$server_params
"
| jq
-r
'.model'
)
server_model
=
$(
echo
"
$server_params
"
| jq
-r
'.model'
)
client_model
=
$(
echo
"
$client_params
"
| jq
-r
'.model'
)
client_model
=
$(
echo
"
$client_params
"
| jq
-r
'.model'
)
if
[[
$server_model
!=
"
$client_model
"
]]
;
then
if
[[
$server_model
!=
"
$client_model
"
]]
;
then
echo
"Server model and client model must be the same. Skip testcase
$testname
."
echo
"Server model and client model must be the same. Skip testcase
$test
_
name
."
continue
continue
fi
fi
...
@@ -293,8 +293,7 @@ run_serving_tests() {
...
@@ -293,8 +293,7 @@ run_serving_tests() {
server_pid
=
$!
server_pid
=
$!
# wait until the server is alive
# wait until the server is alive
wait_for_server
if
wait_for_server
;
then
if
[
$?
-eq
0
]
;
then
echo
""
echo
""
echo
"vllm server is up and running."
echo
"vllm server is up and running."
else
else
...
...
.buildkite/nightly-benchmarks/scripts/wait-for-image.sh
View file @
3be5b26a
...
@@ -6,7 +6,7 @@ TIMEOUT_SECONDS=10
...
@@ -6,7 +6,7 @@ TIMEOUT_SECONDS=10
retries
=
0
retries
=
0
while
[
$retries
-lt
1000
]
;
do
while
[
$retries
-lt
1000
]
;
do
if
[
$(
curl
-s
--max-time
$TIMEOUT_SECONDS
-L
-H
"Authorization: Bearer
$TOKEN
"
-o
/dev/null
-w
"%{http_code}"
$URL
)
-eq
200
]
;
then
if
[
"
$(
curl
-s
--max-time
"
$TIMEOUT_SECONDS
"
-L
-H
"Authorization: Bearer
$TOKEN
"
-o
/dev/null
-w
"%{http_code}"
"
$URL
"
)
"
-eq
200
]
;
then
exit
0
exit
0
fi
fi
...
...
.buildkite/run-amd-test.sh
View file @
3be5b26a
#!/bin/bash
# This script runs test inside the corresponding ROCm docker container.
# This script runs test inside the corresponding ROCm docker container.
set
-o
pipefail
set
-o
pipefail
...
@@ -57,17 +59,17 @@ done
...
@@ -57,17 +59,17 @@ done
echo
"--- Pulling container"
echo
"--- Pulling container"
image_name
=
"rocm/vllm-ci:
${
BUILDKITE_COMMIT
}
"
image_name
=
"rocm/vllm-ci:
${
BUILDKITE_COMMIT
}
"
container_name
=
"rocm_
${
BUILDKITE_COMMIT
}
_
$(
tr
-dc
A-Za-z0-9 < /dev/urandom |
head
-c
10
;
echo
)
"
container_name
=
"rocm_
${
BUILDKITE_COMMIT
}
_
$(
tr
-dc
A-Za-z0-9 < /dev/urandom |
head
-c
10
;
echo
)
"
docker pull
${
image_name
}
docker pull
"
${
image_name
}
"
remove_docker_container
()
{
remove_docker_container
()
{
docker
rm
-f
${
container_name
}
||
docker image
rm
-f
${
image_name
}
||
true
docker
rm
-f
"
${
container_name
}
"
||
docker image
rm
-f
"
${
image_name
}
"
||
true
}
}
trap
remove_docker_container EXIT
trap
remove_docker_container EXIT
echo
"--- Running container"
echo
"--- Running container"
HF_CACHE
=
"
$(
realpath
~
)
/huggingface"
HF_CACHE
=
"
$(
realpath
~
)
/huggingface"
mkdir
-p
${
HF_CACHE
}
mkdir
-p
"
${
HF_CACHE
}
"
HF_MOUNT
=
"/root/.cache/huggingface"
HF_MOUNT
=
"/root/.cache/huggingface"
commands
=
$@
commands
=
$@
...
@@ -118,25 +120,25 @@ if [[ $commands == *"--shard-id="* ]]; then
...
@@ -118,25 +120,25 @@ if [[ $commands == *"--shard-id="* ]]; then
--network
host
\
--network
host
\
--shm-size
=
16gb
\
--shm-size
=
16gb
\
--rm
\
--rm
\
-e
HIP_VISIBLE_DEVICES
=
${
GPU
}
\
-e
HIP_VISIBLE_DEVICES
=
"
${
GPU
}
"
\
-e
HF_TOKEN
\
-e
HF_TOKEN
\
-v
${
HF_CACHE
}
:
${
HF_MOUNT
}
\
-v
"
${
HF_CACHE
}
:
${
HF_MOUNT
}
"
\
-e
HF_HOME
=
${
HF_MOUNT
}
\
-e
"
HF_HOME=
${
HF_MOUNT
}
"
\
--name
${
container_name
}
_
${
GPU
}
\
--name
"
${
container_name
}
_
${
GPU
}
"
\
${
image_name
}
\
"
${
image_name
}
"
\
/bin/bash
-c
"
${
commands_gpu
}
"
\
/bin/bash
-c
"
${
commands_gpu
}
"
\
|&
while
read
-r
line
;
do
echo
">>Shard
$GPU
:
$line
"
;
done
&
|&
while
read
-r
line
;
do
echo
">>Shard
$GPU
:
$line
"
;
done
&
PIDS+
=(
$!
)
PIDS+
=(
$!
)
done
done
#wait for all processes to finish and collect exit codes
#wait for all processes to finish and collect exit codes
for
pid
in
${
PIDS
[@]
}
;
do
for
pid
in
"
${
PIDS
[@]
}
"
;
do
wait
${
pid
}
wait
"
${
pid
}
"
STATUS+
=(
$?
)
STATUS+
=(
$?
)
done
done
for
st
in
${
STATUS
[@]
}
;
do
for
st
in
"
${
STATUS
[@]
}
"
;
do
if
[[
${
st
}
-ne
0
]]
;
then
if
[[
${
st
}
-ne
0
]]
;
then
echo
"One of the processes failed with
$st
"
echo
"One of the processes failed with
$st
"
exit
${
st
}
exit
"
${
st
}
"
fi
fi
done
done
else
else
...
@@ -147,9 +149,9 @@ else
...
@@ -147,9 +149,9 @@ else
--rm
\
--rm
\
-e
HIP_VISIBLE_DEVICES
=
0
\
-e
HIP_VISIBLE_DEVICES
=
0
\
-e
HF_TOKEN
\
-e
HF_TOKEN
\
-v
${
HF_CACHE
}
:
${
HF_MOUNT
}
\
-v
"
${
HF_CACHE
}
:
${
HF_MOUNT
}
"
\
-e
HF_HOME
=
${
HF_MOUNT
}
\
-e
"
HF_HOME=
${
HF_MOUNT
}
"
\
--name
${
container_name
}
\
--name
"
${
container_name
}
"
\
${
image_name
}
\
"
${
image_name
}
"
\
/bin/bash
-c
"
${
commands
}
"
/bin/bash
-c
"
${
commands
}
"
fi
fi
.buildkite/run-benchmarks.sh
View file @
3be5b26a
#!/bin/bash
# This script is run by buildkite to run the benchmarks and upload the results to buildkite
# This script is run by buildkite to run the benchmarks and upload the results to buildkite
set
-ex
set
-ex
...
...
.buildkite/run-cpu-test-ppc64le.sh
View file @
3be5b26a
#!/bin/bash
# This script build the CPU docker image and run the offline inference inside the container.
# This script build the CPU docker image and run the offline inference inside the container.
# It serves a sanity check for compilation and basic model usage.
# It serves a sanity check for compilation and basic model usage.
set
-ex
set
-ex
...
@@ -13,7 +15,7 @@ remove_docker_container
...
@@ -13,7 +15,7 @@ remove_docker_container
# Run the image, setting --shm-size=4g for tensor parallel.
# Run the image, setting --shm-size=4g for tensor parallel.
source
/etc/environment
source
/etc/environment
#docker run -itd --entrypoint /bin/bash -v ~/.cache/huggingface:/root/.cache/huggingface --privileged=true --network host -e HF_TOKEN --env VLLM_CPU_KVCACHE_SPACE=4 --shm-size=4g --name cpu-test cpu-test
#docker run -itd --entrypoint /bin/bash -v ~/.cache/huggingface:/root/.cache/huggingface --privileged=true --network host -e HF_TOKEN --env VLLM_CPU_KVCACHE_SPACE=4 --shm-size=4g --name cpu-test cpu-test
docker run
-itd
--entrypoint
/bin/bash
-v
~/.cache/huggingface:/root/.cache/huggingface
--privileged
=
true
--network
host
-e
HF_TOKEN
=
$HF_TOKEN
--name
cpu-test cpu-test
docker run
-itd
--entrypoint
/bin/bash
-v
~/.cache/huggingface:/root/.cache/huggingface
--privileged
=
true
--network
host
-e
HF_TOKEN
=
"
$HF_TOKEN
"
--name
cpu-test cpu-test
# Run basic model test
# Run basic model test
docker
exec
cpu-test bash
-c
"
docker
exec
cpu-test bash
-c
"
...
...
.buildkite/run-cpu-test.sh
View file @
3be5b26a
#!/bin/bash
# This script build the CPU docker image and run the offline inference inside the container.
# This script build the CPU docker image and run the offline inference inside the container.
# It serves a sanity check for compilation and basic model usage.
# It serves a sanity check for compilation and basic model usage.
set
-ex
set
-ex
...
...
.buildkite/run-multi-node-test.sh
View file @
3be5b26a
...
@@ -14,7 +14,7 @@ DOCKER_IMAGE=$4
...
@@ -14,7 +14,7 @@ DOCKER_IMAGE=$4
shift
4
shift
4
COMMANDS
=(
"
$@
"
)
COMMANDS
=(
"
$@
"
)
if
[
${#
COMMANDS
[@]
}
-ne
$NUM_NODES
]
;
then
if
[
${#
COMMANDS
[@]
}
-ne
"
$NUM_NODES
"
]
;
then
echo
"The number of commands must be equal to the number of nodes."
echo
"The number of commands must be equal to the number of nodes."
echo
"Number of nodes:
$NUM_NODES
"
echo
"Number of nodes:
$NUM_NODES
"
echo
"Number of commands:
${#
COMMANDS
[@]
}
"
echo
"Number of commands:
${#
COMMANDS
[@]
}
"
...
@@ -23,7 +23,7 @@ fi
...
@@ -23,7 +23,7 @@ fi
echo
"List of commands"
echo
"List of commands"
for
command
in
"
${
COMMANDS
[@]
}
"
;
do
for
command
in
"
${
COMMANDS
[@]
}
"
;
do
echo
$command
echo
"
$command
"
done
done
start_network
()
{
start_network
()
{
...
@@ -36,7 +36,7 @@ start_nodes() {
...
@@ -36,7 +36,7 @@ start_nodes() {
for
node_gpu
in
$(
seq
0
$((
$NUM_GPUS
-
1
))
)
;
do
for
node_gpu
in
$(
seq
0
$((
$NUM_GPUS
-
1
))
)
;
do
DEVICE_NUM
=
$((
$node
*
$NUM_GPUS
+
$node_gpu
))
DEVICE_NUM
=
$((
$node
*
$NUM_GPUS
+
$node_gpu
))
GPU_DEVICES+
=
$((
$DEVICE_NUM
))
GPU_DEVICES+
=
$((
$DEVICE_NUM
))
if
[
$node_gpu
-lt
$((
$NUM_GPUS
-
1
))
]
;
then
if
[
"
$node_gpu
"
-lt
$((
$NUM_GPUS
-
1
))
]
;
then
GPU_DEVICES+
=
','
GPU_DEVICES+
=
','
fi
fi
done
done
...
@@ -49,17 +49,20 @@ start_nodes() {
...
@@ -49,17 +49,20 @@ start_nodes() {
# 3. map the huggingface cache directory to the container
# 3. map the huggingface cache directory to the container
# 3. assign ip addresses to the containers (head node: 192.168.10.10, worker nodes:
# 3. assign ip addresses to the containers (head node: 192.168.10.10, worker nodes:
# starting from 192.168.10.11)
# starting from 192.168.10.11)
docker run
-d
--gpus
"
$GPU_DEVICES
"
--shm-size
=
10.24gb
-e
HF_TOKEN
-v
~/.cache/huggingface:/root/.cache/huggingface
--name
node
$node
--network
docker-net
--ip
192.168.10.
$((
10
+
$node
))
--rm
$DOCKER_IMAGE
/bin/bash
-c
"tail -f /dev/null"
docker run
-d
--gpus
"
$GPU_DEVICES
"
--shm-size
=
10.24gb
-e
HF_TOKEN
\
-v
~/.cache/huggingface:/root/.cache/huggingface
--name
"node
$node
"
\
--network
docker-net
--ip
192.168.10.
$((
10
+
$node
))
--rm
"
$DOCKER_IMAGE
"
\
/bin/bash
-c
"tail -f /dev/null"
# organize containers into a ray cluster
# organize containers into a ray cluster
if
[
$node
-eq
0
]
;
then
if
[
"
$node
"
-eq
0
]
;
then
# start the ray head node
# start the ray head node
docker
exec
-d
node
$node
/bin/bash
-c
"ray start --head --port=6379 --block"
docker
exec
-d
"
node
$node
"
/bin/bash
-c
"ray start --head --port=6379 --block"
# wait for the head node to be ready
# wait for the head node to be ready
sleep
10
sleep
10
else
else
# start the ray worker nodes, and connect them to the head node
# start the ray worker nodes, and connect them to the head node
docker
exec
-d
node
$node
/bin/bash
-c
"ray start --address=192.168.10.10:6379 --block"
docker
exec
-d
"
node
$node
"
/bin/bash
-c
"ray start --address=192.168.10.10:6379 --block"
fi
fi
done
done
...
@@ -79,22 +82,22 @@ run_nodes() {
...
@@ -79,22 +82,22 @@ run_nodes() {
for
node_gpu
in
$(
seq
0
$((
$NUM_GPUS
-
1
))
)
;
do
for
node_gpu
in
$(
seq
0
$((
$NUM_GPUS
-
1
))
)
;
do
DEVICE_NUM
=
$((
$node
*
$NUM_GPUS
+
$node_gpu
))
DEVICE_NUM
=
$((
$node
*
$NUM_GPUS
+
$node_gpu
))
GPU_DEVICES+
=
$((
$DEVICE_NUM
))
GPU_DEVICES+
=
$((
$DEVICE_NUM
))
if
[
$node_gpu
-lt
$((
$NUM_GPUS
-
1
))
]
;
then
if
[
"
$node_gpu
"
-lt
$((
$NUM_GPUS
-
1
))
]
;
then
GPU_DEVICES+
=
','
GPU_DEVICES+
=
','
fi
fi
done
done
GPU_DEVICES+
=
'"'
GPU_DEVICES+
=
'"'
echo
"Running node
$node
with GPU devices:
$GPU_DEVICES
"
echo
"Running node
$node
with GPU devices:
$GPU_DEVICES
"
if
[
$node
-ne
0
]
;
then
if
[
"
$node
"
-ne
0
]
;
then
docker
exec
-d
node
$node
/bin/bash
-c
"cd
$WORKING_DIR
;
${
COMMANDS
[
$node
]
}
"
docker
exec
-d
"
node
$node
"
/bin/bash
-c
"cd
$WORKING_DIR
;
${
COMMANDS
[
$node
]
}
"
else
else
docker
exec
node
$node
/bin/bash
-c
"cd
$WORKING_DIR
;
${
COMMANDS
[
$node
]
}
"
docker
exec
"
node
$node
"
/bin/bash
-c
"cd
$WORKING_DIR
;
${
COMMANDS
[
$node
]
}
"
fi
fi
done
done
}
}
cleanup
()
{
cleanup
()
{
for
node
in
$(
seq
0
$((
$NUM_NODES
-
1
))
)
;
do
for
node
in
$(
seq
0
$((
$NUM_NODES
-
1
))
)
;
do
docker stop node
$node
docker stop
"
node
$node
"
done
done
docker network
rm
docker-net
docker network
rm
docker-net
}
}
...
...
.buildkite/run-neuron-test.sh
View file @
3be5b26a
#!/bin/bash
# This script build the Neuron docker image and run the API server inside the container.
# This script build the Neuron docker image and run the API server inside the container.
# It serves a sanity check for compilation and basic model usage.
# It serves a sanity check for compilation and basic model usage.
set
-e
set
-e
...
@@ -12,10 +14,10 @@ if [ -f /tmp/neuron-docker-build-timestamp ]; then
...
@@ -12,10 +14,10 @@ if [ -f /tmp/neuron-docker-build-timestamp ]; then
current_time
=
$(
date
+%s
)
current_time
=
$(
date
+%s
)
if
[
$((
current_time
-
last_build
))
-gt
86400
]
;
then
if
[
$((
current_time
-
last_build
))
-gt
86400
]
;
then
docker system prune
-f
docker system prune
-f
echo
$current_time
>
/tmp/neuron-docker-build-timestamp
echo
"
$current_time
"
>
/tmp/neuron-docker-build-timestamp
fi
fi
else
else
echo
$(
date
+%s
)
>
/tmp/neuron-docker-build-timestamp
date
"
+%s
"
>
/tmp/neuron-docker-build-timestamp
fi
fi
docker build
-t
neuron
-f
Dockerfile.neuron
.
docker build
-t
neuron
-f
Dockerfile.neuron
.
...
@@ -34,7 +36,7 @@ wait_for_server_to_start() {
...
@@ -34,7 +36,7 @@ wait_for_server_to_start() {
timeout
=
300
timeout
=
300
counter
=
0
counter
=
0
while
[
"
$(
curl
-s
-o
/dev/null
-w
'
'
%
{
http_code
}
'
'
localhost:8000/health
)
"
!=
"200"
]
;
do
while
[
"
$(
curl
-s
-o
/dev/null
-w
'%{http_code}'
localhost:8000/health
)
"
!=
"200"
]
;
do
sleep
1
sleep
1
counter
=
$((
counter
+
1
))
counter
=
$((
counter
+
1
))
if
[
$counter
-ge
$timeout
]
;
then
if
[
$counter
-ge
$timeout
]
;
then
...
...
.buildkite/run-openvino-test.sh
View file @
3be5b26a
#!/bin/bash
# This script build the OpenVINO docker image and run the offline inference inside the container.
# This script build the OpenVINO docker image and run the offline inference inside the container.
# It serves a sanity check for compilation and basic model usage.
# It serves a sanity check for compilation and basic model usage.
set
-ex
set
-ex
...
...
.buildkite/run-tpu-test.sh
View file @
3be5b26a
#!/bin/bash
set
-e
set
-e
# Build the docker image.
# Build the docker image.
...
@@ -12,4 +14,4 @@ remove_docker_container
...
@@ -12,4 +14,4 @@ remove_docker_container
# For HF_TOKEN.
# For HF_TOKEN.
source
/etc/environment
source
/etc/environment
# Run a simple end-to-end example.
# Run a simple end-to-end example.
docker run
--privileged
--net
host
--shm-size
=
16G
-it
-e
HF_TOKEN
=
$HF_TOKEN
--name
tpu-test vllm-tpu /bin/bash
-c
"python3 -m pip install git+https://github.com/thuml/depyf.git && python3 -m pip install pytest && python3 -m pip install lm_eval[api]==0.4.4 && pytest -v -s /workspace/vllm/tests/entrypoints/openai/test_accuracy.py && pytest -v -s /workspace/vllm/tests/tpu/test_custom_dispatcher.py && python3 /workspace/vllm/tests/tpu/test_compilation.py && python3 /workspace/vllm/examples/offline_inference_tpu.py"
docker run
--privileged
--net
host
--shm-size
=
16G
-it
-e
"
HF_TOKEN=
$HF_TOKEN
"
--name
tpu-test vllm-tpu /bin/bash
-c
"python3 -m pip install git+https://github.com/thuml/depyf.git && python3 -m pip install pytest && python3 -m pip install lm_eval[api]==0.4.4 && pytest -v -s /workspace/vllm/tests/entrypoints/openai/test_accuracy.py && pytest -v -s /workspace/vllm/tests/tpu/test_custom_dispatcher.py && python3 /workspace/vllm/tests/tpu/test_compilation.py && python3 /workspace/vllm/examples/offline_inference_tpu.py"
.buildkite/run-xpu-test.sh
View file @
3be5b26a
#!/bin/bash
# This script build the CPU docker image and run the offline inference inside the container.
# This script build the CPU docker image and run the offline inference inside the container.
# It serves a sanity check for compilation and basic model usage.
# It serves a sanity check for compilation and basic model usage.
set
-ex
set
-ex
...
...
.github/workflows/scripts/cuda-install.sh
View file @
3be5b26a
#!/bin/bash
#!/bin/bash
# Replace '.' with '-' ex: 11.8 -> 11-8
# Replace '.' with '-' ex: 11.8 -> 11-8
cuda_version
=
$(
echo
$1
|
tr
"."
"-"
)
cuda_version
=
$(
echo
"
$1
"
|
tr
"."
"-"
)
# Removes '-' and '.' ex: ubuntu-20.04 -> ubuntu2004
# Removes '-' and '.' ex: ubuntu-20.04 -> ubuntu2004
OS
=
$(
echo
$2
|
tr
-d
".
\-
"
)
OS
=
$(
echo
"
$2
"
|
tr
-d
".
\-
"
)
# Installs CUDA
# Installs CUDA
wget
-nv
https://developer.download.nvidia.com/compute/cuda/repos/
${
OS
}
/x86_64/cuda-keyring_1.1-1_all.deb
wget
-nv
"
https://developer.download.nvidia.com/compute/cuda/repos/
${
OS
}
/x86_64/cuda-keyring_1.1-1_all.deb
"
sudo
dpkg
-i
cuda-keyring_1.1-1_all.deb
sudo
dpkg
-i
cuda-keyring_1.1-1_all.deb
rm
cuda-keyring_1.1-1_all.deb
rm
cuda-keyring_1.1-1_all.deb
sudo
apt
-qq
update
sudo
apt
-qq
update
sudo
apt
-y
install
cuda-
${
cuda_version
}
cuda-nvcc-
${
cuda_version
}
cuda-libraries-dev-
${
cuda_version
}
sudo
apt
-y
install
"
cuda-
${
cuda_version
}
"
"
cuda-nvcc-
${
cuda_version
}
"
"
cuda-libraries-dev-
${
cuda_version
}
"
sudo
apt clean
sudo
apt clean
# Test nvcc
# Test nvcc
...
...
.github/workflows/scripts/pytorch-install.sh
View file @
3be5b26a
...
@@ -6,7 +6,7 @@ cuda_version=$3
...
@@ -6,7 +6,7 @@ cuda_version=$3
# Install torch
# Install torch
$python_executable
-m
pip
install
numpy pyyaml scipy ipython mkl mkl-include ninja cython typing pandas typing-extensions dataclasses setuptools
&&
conda clean
-ya
$python_executable
-m
pip
install
numpy pyyaml scipy ipython mkl mkl-include ninja cython typing pandas typing-extensions dataclasses setuptools
&&
conda clean
-ya
$python_executable
-m
pip
install
torch
==
${
pytorch_version
}
+cu
${
cuda_version
//./
}
--extra-index-url
https://download.pytorch.org/whl/cu
${
cuda_version
//./
}
$python_executable
-m
pip
install
torch
==
"
${
pytorch_version
}
+cu
${
cuda_version
//./
}
"
--extra-index-url
"
https://download.pytorch.org/whl/cu
${
cuda_version
//./
}
"
# Print version information
# Print version information
$python_executable
--version
$python_executable
--version
...
...
.github/workflows/shellcheck.yml
0 → 100644
View file @
3be5b26a
name
:
Lint shell scripts
on
:
push
:
branches
:
-
"
main"
paths
:
-
'
**/*.sh'
-
'
.github/workflows/shellcheck.yml'
pull_request
:
branches
:
-
"
main"
paths
:
-
'
**/*.sh'
-
'
.github/workflows/shellcheck.yml'
env
:
LC_ALL
:
en_US.UTF-8
defaults
:
run
:
shell
:
bash
permissions
:
contents
:
read
jobs
:
shellcheck
:
runs-on
:
ubuntu-latest
steps
:
-
name
:
"
Checkout"
uses
:
actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332
# v4.1.7
with
:
fetch-depth
:
0
-
name
:
"
Check
shell
scripts"
run
:
|
tools/shellcheck.sh
Prev
1
2
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment