Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
ecb19523
Unverified
Commit
ecb19523
authored
Nov 27, 2025
by
Fadi Arafeh
Committed by
GitHub
Nov 27, 2025
Browse files
[cpu][fix] Fix Arm CI tests (#29552)
Signed-off-by:
Fadi Arafeh
<
fadi.arafeh@arm.com
>
parent
da8e1a1b
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
12 additions
and
14 deletions
+12
-14
.buildkite/scripts/hardware_ci/run-cpu-test-arm.sh
.buildkite/scripts/hardware_ci/run-cpu-test-arm.sh
+12
-14
No files found.
.buildkite/scripts/hardware_ci/run-cpu-test-arm.sh
View file @
ecb19523
...
@@ -7,53 +7,51 @@ set -ex
...
@@ -7,53 +7,51 @@ set -ex
# allow to bind to different cores
# allow to bind to different cores
CORE_RANGE
=
${
CORE_RANGE
:-
0
-16
}
CORE_RANGE
=
${
CORE_RANGE
:-
0
-16
}
OMP_CORE_RANGE
=
${
OMP_CORE_RANGE
:-
0
-16
}
OMP_CORE_RANGE
=
${
OMP_CORE_RANGE
:-
0
-16
}
NUMA_NODE
=
${
NUMA_NODE
:-
0
}
export
CMAKE_BUILD_PARALLEL_LEVEL
=
32
export
CMAKE_BUILD_PARALLEL_LEVEL
=
16
# Setup cleanup
# Setup cleanup
remove_docker_container
()
{
remove_docker_container
()
{
set
-e
;
set
-e
;
docker
rm
-f
cpu-test
-
"
$NUMA_NODE
"
||
true
;
docker
rm
-f
cpu-test
||
true
;
}
}
trap
remove_docker_container EXIT
trap
remove_docker_container EXIT
remove_docker_container
remove_docker_container
# Try building the docker image
# Try building the docker image
numactl
-C
"
$CORE_RANGE
"
-N
"
$NUMA_NODE
"
docker build
--tag
cpu-test
-
"
$NUMA_NODE
"
--target
vllm-test
-f
docker/Dockerfile.cpu
.
docker build
--tag
cpu-test
--target
vllm-test
-f
docker/Dockerfile.cpu
.
# Run the image
, setting --shm-size=4g for tensor parallel.
# Run the image
docker run
-itd
--cpuset-cpus
=
"
$CORE_RANGE
"
--cpuset-mems
=
"
$NUMA_NODE
"
--entrypoint
/bin/bash
-v
~/.cache/huggingface:/root/.cache/huggingface
--privileged
=
true
-e
HF_TOKEN
--env
VLLM_CPU_KVCACHE_SPACE
=
16
--env
VLLM_CPU_CI_ENV
=
1
-e
E2E_OMP_THREADS
=
"
$OMP_CORE_RANGE
"
--shm-size
=
4g
--name
cpu-test
-
"
$NUMA_NODE
"
cpu-test-
"
$NUMA_NODE
"
docker run
-itd
--cpuset-cpus
=
"
$CORE_RANGE
"
--entrypoint
/bin/bash
-v
~/.cache/huggingface:/root/.cache/huggingface
-e
HF_TOKEN
--env
VLLM_CPU_KVCACHE_SPACE
=
16
--env
VLLM_CPU_CI_ENV
=
1
-e
E2E_OMP_THREADS
=
"
$OMP_CORE_RANGE
"
--shm-size
=
4g
--name
cpu-test
cpu-test
function
cpu_tests
()
{
function
cpu_tests
()
{
set
-e
set
-e
export
NUMA_NODE
=
$2
docker
exec
cpu-test
-
"
$NUMA_NODE
"
bash
-c
"
docker
exec
cpu-test bash
-c
"
set -e
set -e
pip list"
pip list"
# offline inference
# offline inference
docker
exec
cpu-test
-
"
$NUMA_NODE
"
bash
-c
"
docker
exec
cpu-test bash
-c
"
set -e
set -e
python3 examples/offline_inference/basic/generate.py --model facebook/opt-125m"
python3 examples/offline_inference/basic/generate.py --model facebook/opt-125m"
# Run kernel tests
# Run kernel tests
docker
exec
cpu-test
-
"
$NUMA_NODE
"
bash
-c
"
docker
exec
cpu-test bash
-c
"
set -e
set -e
pytest -x -v -s tests/kernels/test_onednn.py
pytest -x -v -s tests/kernels/test_onednn.py
pytest -x -v -s tests/kernels/attention/test_cpu_attn.py"
pytest -x -v -s tests/kernels/attention/test_cpu_attn.py"
# basic online serving
# basic online serving
docker
exec
cpu-test
-
"
$NUMA_NODE
"
bash
-c
'
docker
exec
cpu-test bash
-c
'
set -e
set -e
VLLM_CPU_OMP_THREADS_BIND=$E2E_OMP_THREADS vllm serve
meta-llama/Llama-3.2-3B-Instruct
--max-model-len 2048 &
VLLM_CPU_OMP_THREADS_BIND=$E2E_OMP_THREADS vllm serve
Qwen/Qwen3-0.6B
--max-model-len 2048 &
server_pid=$!
server_pid=$!
timeout 600 bash -c "until curl localhost:8000/v1/models; do sleep 1; done" || exit 1
timeout 600 bash -c "until curl localhost:8000/v1/models; do sleep 1; done" || exit 1
vllm bench serve \
vllm bench serve \
--backend vllm \
--backend vllm \
--dataset-name random \
--dataset-name random \
--model
meta-llama/Llama-3.2-3B-Instruct
\
--model
Qwen/Qwen3-0.6B
\
--num-prompts 20 \
--num-prompts 20 \
--endpoint /v1/completions
--endpoint /v1/completions
kill -s SIGTERM $server_pid &'
kill -s SIGTERM $server_pid &'
...
@@ -61,4 +59,4 @@ function cpu_tests() {
...
@@ -61,4 +59,4 @@ function cpu_tests() {
# All of CPU tests are expected to be finished less than 40 mins.
# All of CPU tests are expected to be finished less than 40 mins.
export
-f
cpu_tests
export
-f
cpu_tests
timeout
2h bash
-c
"
cpu_tests
$CORE_RANGE
$NUMA_NODE
"
timeout
2h bash
-c
cpu_tests
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment