Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
8655f47f
Unverified
Commit
8655f47f
authored
Jun 03, 2025
by
Li, Jiang
Committed by
GitHub
Jun 02, 2025
Browse files
[CPU][CI] Re-enable the CPU CI tests (#19046)
Signed-off-by:
jiang.li
<
jiang1.li@intel.com
>
parent
4ce42f92
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
29 additions
and
26 deletions
+29
-26
.buildkite/scripts/hardware_ci/run-cpu-test.sh
.buildkite/scripts/hardware_ci/run-cpu-test.sh
+20
-22
docker/Dockerfile.cpu
docker/Dockerfile.cpu
+7
-3
vllm/distributed/parallel_state.py
vllm/distributed/parallel_state.py
+2
-1
No files found.
.buildkite/scripts/hardware_ci/run-cpu-test.sh
View file @
8655f47f
...
@@ -8,67 +8,65 @@ set -ex
...
@@ -8,67 +8,65 @@ set -ex
CORE_RANGE
=
${
CORE_RANGE
:-
48
-95
}
CORE_RANGE
=
${
CORE_RANGE
:-
48
-95
}
NUMA_NODE
=
${
NUMA_NODE
:-
1
}
NUMA_NODE
=
${
NUMA_NODE
:-
1
}
export
CMAKE_BUILD_PARALLEL_LEVEL
=
32
# Setup cleanup
# Setup cleanup
remove_docker_container
()
{
remove_docker_container
()
{
set
-e
;
set
-e
;
docker
rm
-f
cpu-test-
"
$BUILDKITE_BUILD_NUMBER
"
-
"
$NUMA_NODE
"
cpu-test-
"
$BUILDKITE_BUILD_NUMBER
"
-avx2-
"
$NUMA_NODE
"
||
true
;
docker
rm
-f
cpu-test-
"
$NUMA_NODE
"
cpu-test-
"
$NUMA_NODE
"
-avx2
||
true
;
docker image
rm
cpu-test-
"
$BUILDKITE_BUILD_NUMBER
"
cpu-test-
"
$BUILDKITE_BUILD_NUMBER
"
-avx2
||
true
;
}
}
trap
remove_docker_container EXIT
trap
remove_docker_container EXIT
remove_docker_container
remove_docker_container
# Try building the docker image
# Try building the docker image
numactl
-C
"
$CORE_RANGE
"
-N
"
$NUMA_NODE
"
docker build
--tag
cpu-test-
"
$
BUILDKITE_BUILD_NUMBER
"
--target
vllm-test
-f
docker/Dockerfile.cpu
.
numactl
-C
"
$CORE_RANGE
"
-N
"
$NUMA_NODE
"
docker build
--tag
cpu-test-
"
$
NUMA_NODE
"
--target
vllm-test
-f
docker/Dockerfile.cpu
.
numactl
-C
"
$CORE_RANGE
"
-N
"
$NUMA_NODE
"
docker build
--build-arg
VLLM_CPU_DISABLE_AVX512
=
"true"
--tag
cpu-test-
"
$
BUILDKITE_BUILD_NUMBER
"
-avx2
--target
vllm-test
-f
docker/Dockerfile.cpu
.
numactl
-C
"
$CORE_RANGE
"
-N
"
$NUMA_NODE
"
docker build
--build-arg
VLLM_CPU_DISABLE_AVX512
=
"true"
--tag
cpu-test-
"
$
NUMA_NODE
"
-avx2
--target
vllm-test
-f
docker/Dockerfile.cpu
.
# Run the image, setting --shm-size=4g for tensor parallel.
# Run the image, setting --shm-size=4g for tensor parallel.
docker run
-itd
--entrypoint
/bin/bash
-v
~/.cache/huggingface:/root/.cache/huggingface
--cpuset-cpus
=
"
$CORE_RANGE
"
\
docker run
-itd
--entrypoint
/bin/bash
-v
~/.cache/huggingface:/root/.cache/huggingface
--cpuset-cpus
=
"
$CORE_RANGE
"
\
--cpuset-mems
=
"
$NUMA_NODE
"
--privileged
=
true
-e
HF_TOKEN
--env
VLLM_CPU_KVCACHE_SPACE
=
4
--shm-size
=
4g
--name
cpu-test-
"
$
BUILDKITE_BUILD_NUMBER
"
-
"
$NUMA_NODE
"
cpu-test-
"
$BUILDKITE_BUILD_NUMBER
"
--cpuset-mems
=
"
$NUMA_NODE
"
--privileged
=
true
-e
HF_TOKEN
--env
VLLM_CPU_KVCACHE_SPACE
=
4
--shm-size
=
4g
--name
cpu-test-
"
$
NUMA_NODE
"
cpu-test-
"
$NUMA_NODE
"
docker run
-itd
--entrypoint
/bin/bash
-v
~/.cache/huggingface:/root/.cache/huggingface
--cpuset-cpus
=
"
$CORE_RANGE
"
\
docker run
-itd
--entrypoint
/bin/bash
-v
~/.cache/huggingface:/root/.cache/huggingface
--cpuset-cpus
=
"
$CORE_RANGE
"
\
--cpuset-mems
=
"
$NUMA_NODE
"
--privileged
=
true
-e
HF_TOKEN
--env
VLLM_CPU_KVCACHE_SPACE
=
4
--shm-size
=
4g
--name
cpu-test-
"
$
BUILDKITE_BUILD_NUMBER
"
-avx2-
"
$NUMA_NODE
"
cpu-test-
"
$BUILDKITE_BUILD_NUMBER
"
-avx2
--cpuset-mems
=
"
$NUMA_NODE
"
--privileged
=
true
-e
HF_TOKEN
--env
VLLM_CPU_KVCACHE_SPACE
=
4
--shm-size
=
4g
--name
cpu-test-
"
$
NUMA_NODE
"
-avx2
cpu-test-
"
$NUMA_NODE
"
-avx2
function
cpu_tests
()
{
function
cpu_tests
()
{
set
-e
set
-e
export
NUMA_NODE
=
$2
export
NUMA_NODE
=
$2
export
BUILDKITE_BUILD_NUMBER
=
$3
# offline inference
# offline inference
docker
exec
cpu-test-
"
$
BUILDKITE_BUILD_NUMBER
"
-avx2-
"
$
NUMA_NODE
"
bash
-c
"
docker
exec
cpu-test-
"
$NUMA_NODE
"
-avx2
bash
-c
"
set -e
set -e
python3 examples/offline_inference/basic/generate.py --model facebook/opt-125m"
python3 examples/offline_inference/basic/generate.py --model facebook/opt-125m"
# Run basic model test
# Run basic model test
docker
exec
cpu-test-
"
$
BUILDKITE_BUILD_NUMBER
"
-
"
$
NUMA_NODE
"
bash
-c
"
docker
exec
cpu-test-
"
$NUMA_NODE
"
bash
-c
"
set -e
set -e
pytest -v -s tests/kernels/test_cache.py -m cpu_model
pytest -v -s tests/kernels/attention/test_cache.py -m cpu_model
pytest -v -s tests/kernels/test_mla_decode_cpu.py -m cpu_model
pytest -v -s tests/kernels/attention/test_mla_decode_cpu.py -m cpu_model
pytest -v -s tests/models/decoder_only/language -m cpu_model
pytest -v -s tests/models/language/generation -m cpu_model
pytest -v -s tests/models/embedding/language -m cpu_model
pytest -v -s tests/models/language/pooling -m cpu_model
pytest -v -s tests/models/encoder_decoder/language -m cpu_model
pytest -v -s tests/models/multimodal/generation --ignore=tests/models/multimodal/generation/test_mllama.py -m cpu_model"
pytest -v -s tests/models/decoder_only/audio_language -m cpu_model
pytest -v -s tests/models/decoder_only/vision_language -m cpu_model"
# Run compressed-tensor test
# Run compressed-tensor test
docker
exec
cpu-test-
"
$
BUILDKITE_BUILD_NUMBER
"
-
"
$
NUMA_NODE
"
bash
-c
"
docker
exec
cpu-test-
"
$NUMA_NODE
"
bash
-c
"
set -e
set -e
pytest -s -v
\
pytest -s -v
\
tests/quantization/test_compressed_tensors.py::test_compressed_tensors_w8a8_static_setup
\
tests/quantization/test_compressed_tensors.py::test_compressed_tensors_w8a8_static_setup
\
tests/quantization/test_compressed_tensors.py::test_compressed_tensors_w8a8_dynamic_per_token"
tests/quantization/test_compressed_tensors.py::test_compressed_tensors_w8a8_dynamic_per_token"
# Run AWQ test
# Run AWQ test
docker
exec
cpu-test-
"
$
BUILDKITE_BUILD_NUMBER
"
-
"
$
NUMA_NODE
"
bash
-c
"
docker
exec
cpu-test-
"
$NUMA_NODE
"
bash
-c
"
set -e
set -e
pytest -s -v
\
pytest -s -v
\
tests/quantization/test_ipex_quant.py"
tests/quantization/test_ipex_quant.py"
# Run chunked-prefill and prefix-cache test
# Run chunked-prefill and prefix-cache test
docker
exec
cpu-test-
"
$
BUILDKITE_BUILD_NUMBER
"
-
"
$
NUMA_NODE
"
bash
-c
"
docker
exec
cpu-test-
"
$NUMA_NODE
"
bash
-c
"
set -e
set -e
pytest -s -v -k cpu_model
\
pytest -s -v -k cpu_model
\
tests/basic_correctness/test_chunked_prefill.py"
tests/basic_correctness/test_chunked_prefill.py"
# online serving
# online serving
docker
exec
cpu-test-
"
$
BUILDKITE_BUILD_NUMBER
"
-
"
$
NUMA_NODE
"
bash
-c
"
docker
exec
cpu-test-
"
$NUMA_NODE
"
bash
-c
"
set -e
set -e
export VLLM_CPU_KVCACHE_SPACE=10
export VLLM_CPU_KVCACHE_SPACE=10
export VLLM_CPU_OMP_THREADS_BIND=
$1
export VLLM_CPU_OMP_THREADS_BIND=
$1
...
@@ -83,7 +81,7 @@ function cpu_tests() {
...
@@ -83,7 +81,7 @@ function cpu_tests() {
--tokenizer facebook/opt-125m"
--tokenizer facebook/opt-125m"
# Run multi-lora tests
# Run multi-lora tests
docker
exec
cpu-test-
"
$
BUILDKITE_BUILD_NUMBER
"
-
"
$
NUMA_NODE
"
bash
-c
"
docker
exec
cpu-test-
"
$NUMA_NODE
"
bash
-c
"
set -e
set -e
pytest -s -v
\
pytest -s -v
\
tests/lora/test_qwen2vl.py"
tests/lora/test_qwen2vl.py"
...
@@ -91,4 +89,4 @@ function cpu_tests() {
...
@@ -91,4 +89,4 @@ function cpu_tests() {
# All of CPU tests are expected to be finished less than 40 mins.
# All of CPU tests are expected to be finished less than 40 mins.
export
-f
cpu_tests
export
-f
cpu_tests
timeout
40m bash
-c
"cpu_tests
$CORE_RANGE
$NUMA_NODE
$BUILDKITE_BUILD_NUMBER
"
timeout
40m bash
-c
"cpu_tests
$CORE_RANGE
$NUMA_NODE
"
docker/Dockerfile.cpu
View file @
8655f47f
...
@@ -75,6 +75,7 @@ RUN --mount=type=bind,source=.git,target=.git \
...
@@ -75,6 +75,7 @@ RUN --mount=type=bind,source=.git,target=.git \
RUN --mount=type=cache,target=/root/.cache/uv \
RUN --mount=type=cache,target=/root/.cache/uv \
--mount=type=cache,target=/root/.cache/ccache \
--mount=type=cache,target=/root/.cache/ccache \
--mount=type=cache,target=/workspace/vllm/.deps,sharing=locked \
--mount=type=bind,source=.git,target=.git \
--mount=type=bind,source=.git,target=.git \
VLLM_TARGET_DEVICE=cpu python3 setup.py bdist_wheel
VLLM_TARGET_DEVICE=cpu python3 setup.py bdist_wheel
...
@@ -85,7 +86,7 @@ WORKDIR /workspace/vllm
...
@@ -85,7 +86,7 @@ WORKDIR /workspace/vllm
RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
--mount=type=cache,target=/var/lib/apt,sharing=locked \
--mount=type=cache,target=/var/lib/apt,sharing=locked \
apt-get install -y --no-install-recommends vim numactl
apt-get install -y --no-install-recommends vim numactl
xz-utils
# install development dependencies (for testing)
# install development dependencies (for testing)
RUN --mount=type=cache,target=/root/.cache/uv \
RUN --mount=type=cache,target=/root/.cache/uv \
...
@@ -108,8 +109,11 @@ FROM base AS vllm-test
...
@@ -108,8 +109,11 @@ FROM base AS vllm-test
WORKDIR /workspace/
WORKDIR /workspace/
RUN --mount=type=cache,target=/root/.cache/uv \
RUN --mount=type=cache,target=/root/.cache/uv \
--mount=type=bind,src=requirements/test.txt,target=requirements/test.txt \
--mount=type=bind,src=requirements/test.in,target=requirements/test.in \
uv pip install -r requirements/test.txt
cp requirements/test.in requirements/test-cpu.in && \
sed -i '/mamba_ssm/d' requirements/test-cpu.in && \
uv pip compile requirements/test-cpu.in -o requirements/cpu-test.txt && \
uv pip install -r requirements/cpu-test.txt
RUN --mount=type=cache,target=/root/.cache/uv \
RUN --mount=type=cache,target=/root/.cache/uv \
--mount=type=bind,from=vllm-build,src=/workspace/vllm/dist,target=dist \
--mount=type=bind,from=vllm-build,src=/workspace/vllm/dist,target=dist \
...
...
vllm/distributed/parallel_state.py
View file @
8655f47f
...
@@ -1203,7 +1203,8 @@ def cleanup_dist_env_and_memory(shutdown_ray: bool = False):
...
@@ -1203,7 +1203,8 @@ def cleanup_dist_env_and_memory(shutdown_ray: bool = False):
if
empty_cache
is
not
None
:
if
empty_cache
is
not
None
:
empty_cache
()
empty_cache
()
try
:
try
:
torch
.
_C
.
_host_emptyCache
()
if
not
current_platform
.
is_cpu
():
torch
.
_C
.
_host_emptyCache
()
except
AttributeError
:
except
AttributeError
:
logger
.
warning
(
logger
.
warning
(
"torch._C._host_emptyCache() only available in Pytorch >=2.5"
)
"torch._C._host_emptyCache() only available in Pytorch >=2.5"
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment