Dockerfile.cpu 8.55 KB
Newer Older
1
2
3
4
5
6
7
8
# This vLLM Dockerfile is used to build images that can run vLLM on both x86_64 and arm64 CPU platforms.
#
# Supported platforms:
#   - linux/amd64 (x86_64)
#   - linux/arm64 (aarch64)
#
# Use the `--platform` option with `docker buildx build` to specify the target architecture, e.g.:
#   docker buildx build --platform=linux/arm64 -f docker/Dockerfile.cpu .
9
10
11
12
13
14
15
#
# Build targets:
#   vllm-openai (default): used for serving deployment
#   vllm-test: used for CI tests
#   vllm-dev: used for development
#
# Build arguments:
16
#   PYTHON_VERSION=3.13|3.12 (default)|3.11|3.10
17
#   VLLM_CPU_DISABLE_AVX512=false (default)|true
18
19
20
21
22
#   VLLM_CPU_AVX2=false (default)|true (for cross-compilation)
#   VLLM_CPU_AVX512=false (default)|true (for cross-compilation)
#   VLLM_CPU_AVX512BF16=false (default)|true (for cross-compilation)
#   VLLM_CPU_AVX512VNNI=false (default)|true (for cross-compilation)
#   VLLM_CPU_AMXBF16=false (default)|true (for cross-compilation)
23
24
#

25
26
######################### COMMON BASE IMAGE #########################
FROM ubuntu:22.04 AS base-common
27

28
WORKDIR /workspace
29

30
31
ARG PYTHON_VERSION=3.12
ARG PIP_EXTRA_INDEX_URL="https://download.pytorch.org/whl/cpu"
32

33
34
35
36
# Install minimal dependencies and uv
RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
    --mount=type=cache,target=/var/lib/apt,sharing=locked \
    apt-get update -y \
37
    && apt-get install -y --no-install-recommends sudo ccache git curl wget ca-certificates \
38
    gcc-12 g++-12 libtcmalloc-minimal4 libnuma-dev ffmpeg libsm6 libxext6 libgl1 jq lsof \
39
40
41
    && update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-12 10 --slave /usr/bin/g++ g++ /usr/bin/g++-12 \
    && curl -LsSf https://astral.sh/uv/install.sh | sh

42
ENV CC=/usr/bin/gcc-12 CXX=/usr/bin/g++-12
43
ENV CCACHE_DIR=/root/.cache/ccache
44
45
ENV CMAKE_CXX_COMPILER_LAUNCHER=ccache

46
47
ENV PATH="/root/.local/bin:$PATH"
ENV VIRTUAL_ENV="/opt/venv"
48
ENV UV_PYTHON_INSTALL_DIR=/opt/uv/python
49
50
RUN uv venv --python ${PYTHON_VERSION} --seed ${VIRTUAL_ENV}
ENV PATH="$VIRTUAL_ENV/bin:$PATH"
51

52
ENV UV_HTTP_TIMEOUT=500
53

54
# Install Python dependencies
55
56
57
58
ENV PIP_EXTRA_INDEX_URL=${PIP_EXTRA_INDEX_URL}
ENV UV_EXTRA_INDEX_URL=${PIP_EXTRA_INDEX_URL}
ENV UV_INDEX_STRATEGY="unsafe-best-match"
ENV UV_LINK_MODE="copy"
59
60
61
62
63

# Copy requirements files for installation
COPY requirements/common.txt requirements/common.txt
COPY requirements/cpu.txt requirements/cpu.txt

64
65
66
RUN --mount=type=cache,target=/root/.cache/uv \
    uv pip install --upgrade pip && \
    uv pip install -r requirements/cpu.txt
67

68
69
70
ARG TARGETARCH
ENV TARGETARCH=${TARGETARCH}

71
72
73
74
######################### x86_64 BASE IMAGE #########################
FROM base-common AS base-amd64

ENV LD_PRELOAD="/usr/lib/x86_64-linux-gnu/libtcmalloc_minimal.so.4:/opt/venv/lib/libiomp5.so"
75

76
77
######################### arm64 BASE IMAGE #########################
FROM base-common AS base-arm64
78

79
80
81
82
ENV LD_PRELOAD="/usr/lib/aarch64-linux-gnu/libtcmalloc_minimal.so.4"

######################### BASE IMAGE #########################
FROM base-${TARGETARCH} AS base
83

84
RUN echo 'ulimit -c 0' >> ~/.bashrc
85

86
87
######################### BUILD IMAGE #########################
FROM base AS vllm-build
88

89
ARG max_jobs=32
90
91
ENV MAX_JOBS=${max_jobs}

92
93
ARG GIT_REPO_CHECK=0
# Support for building with non-AVX512 vLLM: docker build --build-arg VLLM_CPU_DISABLE_AVX512="true" ...
94
ARG VLLM_CPU_DISABLE_AVX512=0
95
ENV VLLM_CPU_DISABLE_AVX512=${VLLM_CPU_DISABLE_AVX512}
96
97
98
99
100
101
# Support for cross-compilation with AVX2 ISA: docker build --build-arg VLLM_CPU_AVX2="1" ...
ARG VLLM_CPU_AVX2=0
ENV VLLM_CPU_AVX2=${VLLM_CPU_AVX2}
# Support for cross-compilation with AVX512 ISA: docker build --build-arg VLLM_CPU_AVX512="1" ...
ARG VLLM_CPU_AVX512=0
ENV VLLM_CPU_AVX512=${VLLM_CPU_AVX512}
102
103
104
105
106
107
# Support for building with AVX512BF16 ISA: docker build --build-arg VLLM_CPU_AVX512BF16="true" ...
ARG VLLM_CPU_AVX512BF16=0
ENV VLLM_CPU_AVX512BF16=${VLLM_CPU_AVX512BF16}
# Support for building with AVX512VNNI ISA: docker build --build-arg VLLM_CPU_AVX512VNNI="true" ...
ARG VLLM_CPU_AVX512VNNI=0
ENV VLLM_CPU_AVX512VNNI=${VLLM_CPU_AVX512VNNI}
108
# Support for building with AMXBF16 ISA: docker build --build-arg VLLM_CPU_AMXBF16="true" ...
Li, Jiang's avatar
Li, Jiang committed
109
ARG VLLM_CPU_AMXBF16=1
110
ENV VLLM_CPU_AMXBF16=${VLLM_CPU_AMXBF16}
111

112
WORKDIR /vllm-workspace
113

114
115
116
# Copy build requirements
COPY requirements/cpu-build.txt requirements/build.txt

117
118
RUN --mount=type=cache,target=/root/.cache/uv \
    uv pip install -r requirements/build.txt
119

120
COPY . .
121
122

RUN if [ "$GIT_REPO_CHECK" != 0 ]; then bash tools/check_repo.sh ; fi
123

124
125
RUN --mount=type=cache,target=/root/.cache/uv \
    --mount=type=cache,target=/root/.cache/ccache \
126
    --mount=type=cache,target=/vllm-workspace/.deps,sharing=locked \
127
    VLLM_TARGET_DEVICE=cpu python3 setup.py bdist_wheel --dist-dir=dist --py-limited-api=cp38
128

129
130
131
######################### TEST DEPS #########################
FROM base AS vllm-test-deps

132
WORKDIR /vllm-workspace
133

134
135
136
# Copy test requirements
COPY requirements/test.in requirements/cpu-test.in

137
# TODO: Update to 2.9.0 when there is a new build for intel_extension_for_pytorch for that version
138
RUN \
139
    sed -i '/mamba_ssm/d' requirements/cpu-test.in && \
140
    remove_packages_not_supported_on_aarch64() { \
141
142
143
144
145
146
    case "$(uname -m)" in \
    aarch64|arm64) \
    sed -i '/decord/d' requirements/cpu-test.in; \
    sed -i '/terratorch/d' requirements/cpu-test.in; \
    ;; \
    esac; \
147
148
    }; \
    remove_packages_not_supported_on_aarch64 && \
149
    sed -i 's/^torch==.*/torch==2.10.0/g' requirements/cpu-test.in && \
150
151
    sed -i 's/torchaudio.*/torchaudio/g' requirements/cpu-test.in && \
    sed -i 's/torchvision.*/torchvision/g' requirements/cpu-test.in && \
152
153
154
    uv pip compile requirements/cpu-test.in -o requirements/cpu-test.txt --index-strategy unsafe-best-match --torch-backend cpu

RUN --mount=type=cache,target=/root/.cache/uv \
155
    uv pip install -r requirements/cpu-test.txt
156

157
158
159
######################### DEV IMAGE #########################
FROM vllm-build AS vllm-dev

160
WORKDIR /vllm-workspace
161
162
163

RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
    --mount=type=cache,target=/var/lib/apt,sharing=locked \
164
165
166
    apt-get install -y --no-install-recommends vim numactl xz-utils make clangd-14

RUN ln -s /usr/bin/clangd-14 /usr/bin/clangd
167
168
169

# install development dependencies (for testing)
RUN --mount=type=cache,target=/root/.cache/uv \
170
    uv pip install -e tests/vllm_test_utils
171

172
RUN --mount=type=cache,target=/root/.cache/uv \
173
    --mount=type=cache,target=/root/.cache/ccache \
174
    --mount=type=bind,source=.git,target=.git \
175
    VLLM_TARGET_DEVICE=cpu python3 setup.py develop
176

177
COPY --from=vllm-test-deps /vllm-workspace/requirements/cpu-test.txt requirements/test.txt
178

179
180
181
182
183
184
185
RUN --mount=type=cache,target=/root/.cache/uv \
    uv pip install -r requirements/dev.txt && \
    pre-commit install --hook-type pre-commit --hook-type commit-msg

ENTRYPOINT ["bash"]

######################### TEST IMAGE #########################
186
FROM vllm-test-deps AS vllm-test
187

188
WORKDIR /vllm-workspace
189

190
RUN --mount=type=cache,target=/root/.cache/uv \
191
    --mount=type=bind,from=vllm-build,src=/vllm-workspace/dist,target=dist \
192
193
194
195
196
    uv pip install dist/*.whl

ADD ./tests/ ./tests/
ADD ./examples/ ./examples/
ADD ./benchmarks/ ./benchmarks/
197
ADD ./vllm/collect_env.py .
198
ADD ./.buildkite/ ./.buildkite/
199

youkaichao's avatar
youkaichao committed
200
# install development dependencies (for testing)
201
RUN --mount=type=cache,target=/root/.cache/uv \
202
    uv pip install -e tests/vllm_test_utils
203
204
205
206

######################### RELEASE IMAGE #########################
FROM base AS vllm-openai

207
WORKDIR /vllm-workspace
208
209
210

RUN --mount=type=cache,target=/root/.cache/uv \
    --mount=type=cache,target=/root/.cache/ccache \
211
    --mount=type=bind,from=vllm-build,src=/vllm-workspace/dist,target=dist \
212
    uv pip install dist/*.whl
youkaichao's avatar
youkaichao committed
213

214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
# Add labels to document build configuration
LABEL org.opencontainers.image.title="vLLM CPU"
LABEL org.opencontainers.image.description="vLLM inference engine for CPU platforms"
LABEL org.opencontainers.image.vendor="vLLM Project"
LABEL org.opencontainers.image.source="https://github.com/vllm-project/vllm"

# Build configuration labels
ARG TARGETARCH
ARG VLLM_CPU_DISABLE_AVX512
ARG VLLM_CPU_AVX2
ARG VLLM_CPU_AVX512
ARG VLLM_CPU_AVX512BF16
ARG VLLM_CPU_AVX512VNNI
ARG VLLM_CPU_AMXBF16
ARG PYTHON_VERSION

LABEL ai.vllm.build.target-arch="${TARGETARCH}"
LABEL ai.vllm.build.cpu-disable-avx512="${VLLM_CPU_DISABLE_AVX512:-false}"
LABEL ai.vllm.build.cpu-avx2="${VLLM_CPU_AVX2:-false}"
LABEL ai.vllm.build.cpu-avx512="${VLLM_CPU_AVX512:-false}"
LABEL ai.vllm.build.cpu-avx512bf16="${VLLM_CPU_AVX512BF16:-false}"
LABEL ai.vllm.build.cpu-avx512vnni="${VLLM_CPU_AVX512VNNI:-false}"
LABEL ai.vllm.build.cpu-amxbf16="${VLLM_CPU_AMXBF16:-false}"
LABEL ai.vllm.build.python-version="${PYTHON_VERSION:-3.12}"

239
ENTRYPOINT ["vllm", "serve"]