Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
280d0741
Unverified
Commit
280d0741
authored
Mar 28, 2025
by
Li, Jiang
Committed by
GitHub
Mar 28, 2025
Browse files
[CPU][CI] Improve CPU Dockerfile (#15690)
Signed-off-by:
jiang1.li
<
jiang1.li@intel.com
>
parent
32b14baf
Changes
5
Show whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
146 additions
and
54 deletions
+146
-54
.buildkite/release-pipeline.yaml
.buildkite/release-pipeline.yaml
+1
-1
.buildkite/run-cpu-test.sh
.buildkite/run-cpu-test.sh
+9
-7
Dockerfile.cpu
Dockerfile.cpu
+107
-38
docs/source/getting_started/installation/cpu.md
docs/source/getting_started/installation/cpu.md
+27
-8
docs/source/getting_started/installation/cpu/x86.inc.md
docs/source/getting_started/installation/cpu/x86.inc.md
+2
-0
No files found.
.buildkite/release-pipeline.yaml
View file @
280d0741
...
...
@@ -82,7 +82,7 @@ steps:
queue
:
cpu_queue_postmerge
commands
:
-
"
aws
ecr-public
get-login-password
--region
us-east-1
|
docker
login
--username
AWS
--password-stdin
public.ecr.aws/q9t5s3a7"
-
"
DOCKER_BUILDKIT=1
docker
build
--build-arg
max_jobs=16
--build-arg
GIT_REPO_CHECK=1
--tag
public.ecr.aws/q9t5s3a7/vllm-cpu-release-repo:$(buildkite-agent
meta-data
get
release-version)
--tag
public.ecr.aws/q9t5s3a7/vllm-cpu-release-repo:latest
--progress
plain
-f
Dockerfile.cpu
."
-
"
DOCKER_BUILDKIT=1
docker
build
--build-arg
max_jobs=16
--build-arg
GIT_REPO_CHECK=1
--tag
public.ecr.aws/q9t5s3a7/vllm-cpu-release-repo:$(buildkite-agent
meta-data
get
release-version)
--tag
public.ecr.aws/q9t5s3a7/vllm-cpu-release-repo:latest
--progress
plain
--target
vllm-openai
-f
Dockerfile.cpu
."
-
"
docker
push
public.ecr.aws/q9t5s3a7/vllm-cpu-release-repo:$(buildkite-agent
meta-data
get
release-version)"
env
:
DOCKER_BUILDKIT
:
"
1"
.buildkite/run-cpu-test.sh
View file @
280d0741
...
...
@@ -8,15 +8,19 @@ set -ex
CORE_RANGE
=
${
CORE_RANGE
:-
48
-95
}
NUMA_NODE
=
${
NUMA_NODE
:-
1
}
# Try building the docker image
numactl
-C
"
$CORE_RANGE
"
-N
"
$NUMA_NODE
"
docker build
-t
cpu-test-
"
$BUILDKITE_BUILD_NUMBER
"
-f
Dockerfile.cpu
.
numactl
-C
"
$CORE_RANGE
"
-N
"
$NUMA_NODE
"
docker build
--build-arg
VLLM_CPU_DISABLE_AVX512
=
"true"
-t
cpu-test-
"
$BUILDKITE_BUILD_NUMBER
"
-avx2
-f
Dockerfile.cpu
.
# Setup cleanup
remove_docker_container
()
{
set
-e
;
docker
rm
-f
cpu-test-
"
$BUILDKITE_BUILD_NUMBER
"
-
"
$NUMA_NODE
"
cpu-test-
"
$BUILDKITE_BUILD_NUMBER
"
-avx2-
"
$NUMA_NODE
"
||
true
;
}
remove_docker_container
()
{
set
-e
;
docker
rm
-f
cpu-test-
"
$BUILDKITE_BUILD_NUMBER
"
-
"
$NUMA_NODE
"
cpu-test-
"
$BUILDKITE_BUILD_NUMBER
"
-avx2-
"
$NUMA_NODE
"
||
true
;
docker image
rm
cpu-test-
"
$BUILDKITE_BUILD_NUMBER
"
cpu-test-
"
$BUILDKITE_BUILD_NUMBER
"
-avx2
||
true
;
}
trap
remove_docker_container EXIT
remove_docker_container
# Try building the docker image
numactl
-C
"
$CORE_RANGE
"
-N
"
$NUMA_NODE
"
docker build
--tag
cpu-test-
"
$BUILDKITE_BUILD_NUMBER
"
--target
vllm-test
-f
Dockerfile.cpu
.
numactl
-C
"
$CORE_RANGE
"
-N
"
$NUMA_NODE
"
docker build
--build-arg
VLLM_CPU_DISABLE_AVX512
=
"true"
--tag
cpu-test-
"
$BUILDKITE_BUILD_NUMBER
"
-avx2
--target
vllm-test
-f
Dockerfile.cpu
.
# Run the image, setting --shm-size=4g for tensor parallel.
docker run
-itd
--entrypoint
/bin/bash
-v
~/.cache/huggingface:/root/.cache/huggingface
--cpuset-cpus
=
"
$CORE_RANGE
"
\
--cpuset-mems
=
"
$NUMA_NODE
"
--privileged
=
true
-e
HF_TOKEN
--env
VLLM_CPU_KVCACHE_SPACE
=
4
--shm-size
=
4g
--name
cpu-test-
"
$BUILDKITE_BUILD_NUMBER
"
-
"
$NUMA_NODE
"
cpu-test-
"
$BUILDKITE_BUILD_NUMBER
"
...
...
@@ -36,8 +40,6 @@ function cpu_tests() {
# Run basic model test
docker
exec
cpu-test-
"
$BUILDKITE_BUILD_NUMBER
"
-
"
$NUMA_NODE
"
bash
-c
"
set -e
pip install -r vllm/requirements/test.txt
pip install -r vllm/requirements/cpu.txt
pytest -v -s tests/kernels/test_cache.py -m cpu_model
pytest -v -s tests/kernels/test_mla_decode_cpu.py -m cpu_model
pytest -v -s tests/models/decoder_only/language -m cpu_model
...
...
Dockerfile.cpu
View file @
280d0741
# This vLLM Dockerfile is used to construct image that can build and run vLLM on x86 CPU platform.
#
# Build targets:
# vllm-openai (default): used for serving deployment
# vllm-test: used for CI tests
# vllm-dev: used for development
#
# Build arguments:
# PYTHON_VERSION=3.12 (default)|3.11|3.10|3.9
# VLLM_CPU_DISABLE_AVX512=false (default)|true
#
######################### BASE IMAGE #########################
FROM ubuntu:22.04 AS base
FROM ubuntu:22.04 AS cpu-test-1
WORKDIR /workspace/
ENV CCACHE_DIR=/root/.cache/ccache
ARG PYTHON_VERSION=3.12
ARG PIP_EXTRA_INDEX_URL="https://download.pytorch.org/whl/cpu"
# Install minimal dependencies and uv
RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
--mount=type=cache,target=/var/lib/apt,sharing=locked \
apt-get update -y \
&& apt-get install -y --no-install-recommends ccache git curl wget ca-certificates \
gcc-12 g++-12 libtcmalloc-minimal4 libnuma-dev ffmpeg libsm6 libxext6 libgl1 \
&& update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-12 10 --slave /usr/bin/g++ g++ /usr/bin/g++-12 \
&& curl -LsSf https://astral.sh/uv/install.sh | sh
ENV CCACHE_DIR=/root/.cache/ccache
ENV CMAKE_CXX_COMPILER_LAUNCHER=ccache
RUN --mount=type=cache,target=/var/cache/apt \
apt-get update -y \
&& apt-get install -y curl ccache git wget vim numactl gcc-12 g++-12 python3 python3-pip libtcmalloc-minimal4 libnuma-dev \
&& apt-get install -y ffmpeg libsm6 libxext6 libgl1 \
&& update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-12 10 --slave /usr/bin/g++ g++ /usr/bin/g++-12
ENV PATH="/root/.local/bin:$PATH"
ENV VIRTUAL_ENV="/opt/venv"
RUN uv venv --python ${PYTHON_VERSION} --seed ${VIRTUAL_ENV}
ENV PATH="$VIRTUAL_ENV/bin:$PATH"
# https://intel.github.io/intel-extension-for-pytorch/cpu/latest/tutorials/performance_tuning/tuning_guide.html
# intel-openmp provides additional performance improvement vs. openmp
# tcmalloc provides better memory allocation efficiency, e.g, holding memory in caches to speed up access of commonly-used objects.
RUN --mount=type=cache,target=/root/.cache/pip \
pip install intel-openmp==2025.0.1
ENV UV_HTTP_TIMEOUT=500
ENV LD_PRELOAD="/usr/lib/x86_64-linux-gnu/libtcmalloc_minimal.so.4:/usr/local/lib/libiomp5.so"
# Install Python dependencies
ENV PIP_EXTRA_INDEX_URL=${PIP_EXTRA_INDEX_URL}
ENV UV_EXTRA_INDEX_URL=${PIP_EXTRA_INDEX_URL}
ENV UV_INDEX_STRATEGY="unsafe-best-match"
ENV UV_LINK_MODE="copy"
RUN --mount=type=cache,target=/root/.cache/uv \
--mount=type=bind,src=requirements/common.txt,target=requirements/common.txt \
--mount=type=bind,src=requirements/cpu.txt,target=requirements/cpu.txt \
uv pip install --upgrade pip && \
uv pip install -r requirements/cpu.txt
RUN echo 'ulimit -c 0' >> ~/.bashrc
RUN --mount=type=cache,target=/root/.cache/uv \
uv pip install intel-openmp==2024.2.1 intel_extension_for_pytorch==2.6.0
RUN pip install intel_extension_for_pytorch==2.6.0
ENV LD_PRELOAD="/usr/lib/x86_64-linux-gnu/libtcmalloc_minimal.so.4:/opt/venv/lib/libiomp5.so:$LD_PRELOAD"
WORKDIR /workspace
RUN echo 'ulimit -c 0' >> ~/.bashrc
ARG PIP_EXTRA_INDEX_URL="https://download.pytorch.org/whl/cpu"
ENV PIP_EXTRA_INDEX_URL=${PIP_EXTRA_INDEX_URL}
RUN --mount=type=cache,target=/root/.cache/pip \
--mount=type=bind,src=requirements/build.txt,target=requirements/build.txt \
pip install --upgrade pip && \
pip install -r requirements/build.txt
######################### BUILD IMAGE #########################
FROM base AS vllm-build
FROM cpu-test-1 AS build
ARG GIT_REPO_CHECK=0
# Support for building with non-AVX512 vLLM: docker build --build-arg VLLM_CPU_DISABLE_AVX512="true" ...
ARG VLLM_CPU_DISABLE_AVX512
ENV VLLM_CPU_DISABLE_AVX512=${VLLM_CPU_DISABLE_AVX512}
WORKDIR /workspace/vllm
RUN --mount=type=cache,target=/root/.cache/pip \
--mount=type=bind,src=requirements/common.txt,target=requirements/common.txt \
--mount=type=bind,src=requirements/cpu.txt,target=requirements/cpu.txt \
pip install -v -r requirements/cpu.txt
RUN --mount=type=cache,target=/root/.cache/uv \
--mount=type=bind,src=requirements/build.txt,target=requirements/build.txt \
uv pip install -r requirements/build.txt
COPY . .
ARG GIT_REPO_CHECK=0
RUN --mount=type=bind,source=.git,target=.git \
if [ "$GIT_REPO_CHECK" != 0 ]; then bash tools/check_repo.sh ; fi
# Support for building with non-AVX512 vLLM: docker build --build-arg VLLM_CPU_DISABLE_AVX512="true" ...
ARG VLLM_CPU_DISABLE_AVX512
ENV VLLM_CPU_DISABLE_AVX512=${VLLM_CPU_DISABLE_AVX512}
RUN --mount=type=cache,target=/root/.cache/uv \
--mount=type=cache,target=/root/.cache/ccache \
--mount=type=bind,source=.git,target=.git \
VLLM_TARGET_DEVICE=cpu python3 setup.py bdist_wheel
######################### DEV IMAGE #########################
FROM vllm-build AS vllm-dev
WORKDIR /workspace/vllm
RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
--mount=type=cache,target=/var/lib/apt,sharing=locked \
apt-get install -y --no-install-recommends vim numactl
# install development dependencies (for testing)
RUN --mount=type=cache,target=/root/.cache/uv \
uv pip install -e tests/vllm_test_utils
RUN --mount=type=cache,target=/root/.cache/
pip
\
RUN --mount=type=cache,target=/root/.cache/
uv
\
--mount=type=cache,target=/root/.cache/ccache \
--mount=type=bind,source=.git,target=.git \
VLLM_TARGET_DEVICE=cpu python3 setup.py bdist_wheel && \
pip install dist/*.whl && \
rm -rf dist
VLLM_TARGET_DEVICE=cpu python3 setup.py develop
RUN --mount=type=cache,target=/root/.cache/uv \
uv pip install -r requirements/dev.txt && \
pre-commit install --hook-type pre-commit --hook-type commit-msg
ENTRYPOINT ["bash"]
######################### TEST IMAGE #########################
FROM base AS vllm-test
WORKDIR /workspace/
RUN ln -s /workspace/vllm/tests && ln -s /workspace/vllm/examples && ln -s /workspace/vllm/benchmarks
RUN --mount=type=cache,target=/root/.cache/uv \
--mount=type=bind,src=requirements/test.txt,target=requirements/test.txt \
uv pip install -r requirements/test.txt
RUN --mount=type=cache,target=/root/.cache/uv \
--mount=type=bind,from=vllm-build,src=/workspace/vllm/dist,target=dist \
uv pip install dist/*.whl
ADD ./tests/ ./tests/
ADD ./examples/ ./examples/
ADD ./benchmarks/ ./benchmarks/
# install development dependencies (for testing)
RUN --mount=type=cache,target=/root/.cache/pip \
pip install -e tests/vllm_test_utils
RUN --mount=type=cache,target=/root/.cache/uv \
uv pip install -e tests/vllm_test_utils
ENTRYPOINT ["bash"]
######################### RELEASE IMAGE #########################
FROM base AS vllm-openai
WORKDIR /workspace/
RUN --mount=type=cache,target=/root/.cache/uv \
--mount=type=cache,target=/root/.cache/ccache \
--mount=type=bind,from=vllm-build,src=/workspace/vllm/dist,target=dist \
uv pip install dist/*.whl
ENTRYPOINT ["python3", "-m", "vllm.entrypoints.openai.api_server"]
docs/source/getting_started/installation/cpu.md
View file @
280d0741
...
...
@@ -159,18 +159,37 @@ Currently, there are no pre-built CPU wheels.
### Pre-built images
Currently, there are no pre-build CPU images.
:::::{tab-set}
:sync-group: device
::::{tab-item} Intel/AMD x86
:sync: x86
:::{include} cpu/x86.inc.md
:start-after: "### Pre-built images"
:end-before: "### Build image from source"
:::
::::
:::::
### Build image from source
```
console
$
docker build
-f
Dockerfile.cpu
-t
vllm-cpu-env
--shm-size
=
4g
.
$
docker run
-it
\
--rm
\
--network
=
host
\
--cpuset-cpus
=
<cpu-id-list, optional>
\
--cpuset-mems
=
<memory-node, optional>
\
vllm-cpu-env
$
docker build
-f
Dockerfile.cpu
--tag
vllm-cpu-env
--target
vllm-openai
.
#
Launching OpenAI server
$
docker run
--rm
\
--privileged
=
true
\
--shm-size
=
4g
\
-p
8000:8000
\
-e
VLLM_CPU_KVCACHE_SPACE
=
<KV cache space>
\
-e
VLLM_CPU_OMP_THREADS_BIND
=
<CPU cores
for
inference>
\
vllm-cpu-env
\
--model
=
meta-llama/Llama-3.2-1B-Instruct
\
--dtype
=
bfloat16
\
other vLLM OpenAI server arguments
```
::::{tip}
...
...
docs/source/getting_started/installation/cpu/x86.inc.md
View file @
280d0741
...
...
@@ -34,6 +34,8 @@ There are no pre-built wheels or images for this device, so you must build vLLM
### Pre-built images
See
[
https://gallery.ecr.aws/q9t5s3a7/vllm-cpu-release-repo
](
https://gallery.ecr.aws/q9t5s3a7/vllm-cpu-release-repo
)
### Build image from source
## Extra information
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment