Dockerfile.cpu 9.39 KB
Newer Older
1
2
3
4
5
6
7
8
# This vLLM Dockerfile is used to build images that can run vLLM on both x86_64 and arm64 CPU platforms.
#
# Supported platforms:
#   - linux/amd64 (x86_64)
#   - linux/arm64 (aarch64)
#
# Use the `--platform` option with `docker buildx build` to specify the target architecture, e.g.:
#   docker buildx build --platform=linux/arm64 -f docker/Dockerfile.cpu .
9
10
11
12
13
14
15
#
# Build targets:
#   vllm-openai (default): used for serving deployment
#   vllm-test: used for CI tests
#   vllm-dev: used for development
#
# Build arguments:
16
#   PYTHON_VERSION=3.13|3.12 (default)|3.11|3.10
17
#   VLLM_CPU_DISABLE_AVX512=false (default)|true
18
19
20
21
22
#   VLLM_CPU_AVX2=false (default)|true (for cross-compilation)
#   VLLM_CPU_AVX512=false (default)|true (for cross-compilation)
#   VLLM_CPU_AVX512BF16=false (default)|true (for cross-compilation)
#   VLLM_CPU_AVX512VNNI=false (default)|true (for cross-compilation)
#   VLLM_CPU_AMXBF16=false (default)|true (for cross-compilation)
23
#   VLLM_CPU_ARM_BF16=false (default)|true (for cross-compilation)
24
25
#

26
27
######################### COMMON BASE IMAGE #########################
FROM ubuntu:22.04 AS base-common
28

29
WORKDIR /workspace
30

31
32
ARG PYTHON_VERSION=3.12
ARG PIP_EXTRA_INDEX_URL="https://download.pytorch.org/whl/cpu"
33

34
35
36
37
# Install minimal dependencies and uv
RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
    --mount=type=cache,target=/var/lib/apt,sharing=locked \
    apt-get update -y \
38
    && apt-get install -y --no-install-recommends sudo ccache git curl wget ca-certificates \
39
    gcc-12 g++-12 libtcmalloc-minimal4 libnuma-dev ffmpeg libsm6 libxext6 libgl1 jq lsof \
40
41
42
    && update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-12 10 --slave /usr/bin/g++ g++ /usr/bin/g++-12 \
    && curl -LsSf https://astral.sh/uv/install.sh | sh

43
ENV CC=/usr/bin/gcc-12 CXX=/usr/bin/g++-12
44
ENV CCACHE_DIR=/root/.cache/ccache
45
46
ENV CMAKE_CXX_COMPILER_LAUNCHER=ccache

47
48
ENV PATH="/root/.local/bin:$PATH"
ENV VIRTUAL_ENV="/opt/venv"
49
ENV UV_PYTHON_INSTALL_DIR=/opt/uv/python
50
51
RUN uv venv --python ${PYTHON_VERSION} --seed ${VIRTUAL_ENV}
ENV PATH="$VIRTUAL_ENV/bin:$PATH"
52

53
ENV UV_HTTP_TIMEOUT=500
54

55
# Install Python dependencies
56
57
58
59
ENV PIP_EXTRA_INDEX_URL=${PIP_EXTRA_INDEX_URL}
ENV UV_EXTRA_INDEX_URL=${PIP_EXTRA_INDEX_URL}
ENV UV_INDEX_STRATEGY="unsafe-best-match"
ENV UV_LINK_MODE="copy"
60
61
62
63
64

# Copy requirements files for installation
COPY requirements/common.txt requirements/common.txt
COPY requirements/cpu.txt requirements/cpu.txt

65
66
67
RUN --mount=type=cache,target=/root/.cache/uv \
    uv pip install --upgrade pip && \
    uv pip install -r requirements/cpu.txt
68

69
70
71
ARG TARGETARCH
ENV TARGETARCH=${TARGETARCH}

72
73
74
75
######################### x86_64 BASE IMAGE #########################
FROM base-common AS base-amd64

ENV LD_PRELOAD="/usr/lib/x86_64-linux-gnu/libtcmalloc_minimal.so.4:/opt/venv/lib/libiomp5.so"
76

77
78
######################### arm64 BASE IMAGE #########################
FROM base-common AS base-arm64
79

80
81
82
83
ENV LD_PRELOAD="/usr/lib/aarch64-linux-gnu/libtcmalloc_minimal.so.4"

######################### BASE IMAGE #########################
FROM base-${TARGETARCH} AS base
84

85
RUN echo 'ulimit -c 0' >> ~/.bashrc
86

87
88
######################### BUILD IMAGE #########################
FROM base AS vllm-build
89

90
ARG max_jobs=32
91
92
ENV MAX_JOBS=${max_jobs}

93
94
ARG GIT_REPO_CHECK=0
# Support for building with non-AVX512 vLLM: docker build --build-arg VLLM_CPU_DISABLE_AVX512="true" ...
95
ARG VLLM_CPU_DISABLE_AVX512=0
96
ENV VLLM_CPU_DISABLE_AVX512=${VLLM_CPU_DISABLE_AVX512}
97
98
99
100
101
102
# Support for cross-compilation with AVX2 ISA: docker build --build-arg VLLM_CPU_AVX2="1" ...
ARG VLLM_CPU_AVX2=0
ENV VLLM_CPU_AVX2=${VLLM_CPU_AVX2}
# Support for cross-compilation with AVX512 ISA: docker build --build-arg VLLM_CPU_AVX512="1" ...
ARG VLLM_CPU_AVX512=0
ENV VLLM_CPU_AVX512=${VLLM_CPU_AVX512}
103
104
105
106
107
108
# Support for building with AVX512BF16 ISA: docker build --build-arg VLLM_CPU_AVX512BF16="true" ...
ARG VLLM_CPU_AVX512BF16=0
ENV VLLM_CPU_AVX512BF16=${VLLM_CPU_AVX512BF16}
# Support for building with AVX512VNNI ISA: docker build --build-arg VLLM_CPU_AVX512VNNI="true" ...
ARG VLLM_CPU_AVX512VNNI=0
ENV VLLM_CPU_AVX512VNNI=${VLLM_CPU_AVX512VNNI}
109
# Support for building with AMXBF16 ISA: docker build --build-arg VLLM_CPU_AMXBF16="true" ...
Li, Jiang's avatar
Li, Jiang committed
110
ARG VLLM_CPU_AMXBF16=1
111
ENV VLLM_CPU_AMXBF16=${VLLM_CPU_AMXBF16}
112
113
114
# Support for cross-compilation with ARM BF16 ISA: docker build --build-arg VLLM_CPU_ARM_BF16="true" ...
ARG VLLM_CPU_ARM_BF16=0
ENV VLLM_CPU_ARM_BF16=${VLLM_CPU_ARM_BF16}
115

116
WORKDIR /vllm-workspace
117

118
119
120
121
122
123
124
125
126
127
# Validate build arguments - prevent mixing incompatible ISA flags
RUN if [ "$TARGETARCH" = "arm64" ] && { [ "$VLLM_CPU_AVX2" != "0" ] || [ "$VLLM_CPU_AVX512" != "0" ] || [ "$VLLM_CPU_AVX512BF16" != "0" ] || [ "$VLLM_CPU_AVX512VNNI" != "0" ]; }; then \
        echo "ERROR: Cannot use x86-specific ISA flags (AVX2, AVX512, etc.) when building for ARM64 (--platform=linux/arm64)"; \
        exit 1; \
    fi && \
    if [ "$TARGETARCH" = "amd64" ] && [ "$VLLM_CPU_ARM_BF16" != "0" ]; then \
        echo "ERROR: Cannot use ARM-specific ISA flags (ARM_BF16) when building for x86_64 (--platform=linux/amd64)"; \
        exit 1; \
    fi

128
129
130
# Copy build requirements
COPY requirements/cpu-build.txt requirements/build.txt

131
132
RUN --mount=type=cache,target=/root/.cache/uv \
    uv pip install -r requirements/build.txt
133

134
COPY . .
135
136

RUN if [ "$GIT_REPO_CHECK" != 0 ]; then bash tools/check_repo.sh ; fi
137

138
139
RUN --mount=type=cache,target=/root/.cache/uv \
    --mount=type=cache,target=/root/.cache/ccache \
140
    --mount=type=cache,target=/vllm-workspace/.deps,sharing=locked \
141
    VLLM_TARGET_DEVICE=cpu python3 setup.py bdist_wheel --dist-dir=dist --py-limited-api=cp38
142

143
144
145
######################### TEST DEPS #########################
FROM base AS vllm-test-deps

146
WORKDIR /vllm-workspace
147

148
149
150
151
# Copy test requirements
COPY requirements/test.in requirements/cpu-test.in

RUN \
152
    sed -i '/mamba_ssm/d' requirements/cpu-test.in && \
153
    remove_packages_not_supported_on_aarch64() { \
154
155
156
157
158
159
    case "$(uname -m)" in \
    aarch64|arm64) \
    sed -i '/decord/d' requirements/cpu-test.in; \
    sed -i '/terratorch/d' requirements/cpu-test.in; \
    ;; \
    esac; \
160
161
    }; \
    remove_packages_not_supported_on_aarch64 && \
162
    sed -i 's/^torch==.*/torch==2.10.0/g' requirements/cpu-test.in && \
163
164
    sed -i 's/torchaudio.*/torchaudio/g' requirements/cpu-test.in && \
    sed -i 's/torchvision.*/torchvision/g' requirements/cpu-test.in && \
165
166
167
    uv pip compile requirements/cpu-test.in -o requirements/cpu-test.txt --index-strategy unsafe-best-match --torch-backend cpu

RUN --mount=type=cache,target=/root/.cache/uv \
168
    uv pip install -r requirements/cpu-test.txt
169

170
171
172
######################### DEV IMAGE #########################
FROM vllm-build AS vllm-dev

173
WORKDIR /vllm-workspace
174
175
176

RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
    --mount=type=cache,target=/var/lib/apt,sharing=locked \
177
178
179
    apt-get install -y --no-install-recommends vim numactl xz-utils make clangd-14

RUN ln -s /usr/bin/clangd-14 /usr/bin/clangd
180
181
182

# install development dependencies (for testing)
RUN --mount=type=cache,target=/root/.cache/uv \
183
    uv pip install -e tests/vllm_test_utils
184

185
RUN --mount=type=cache,target=/root/.cache/uv \
186
    --mount=type=cache,target=/root/.cache/ccache \
187
    --mount=type=bind,source=.git,target=.git \
188
    VLLM_TARGET_DEVICE=cpu python3 setup.py develop
189

190
COPY --from=vllm-test-deps /vllm-workspace/requirements/cpu-test.txt requirements/test.txt
191

192
193
194
195
196
197
198
RUN --mount=type=cache,target=/root/.cache/uv \
    uv pip install -r requirements/dev.txt && \
    pre-commit install --hook-type pre-commit --hook-type commit-msg

ENTRYPOINT ["bash"]

######################### TEST IMAGE #########################
199
FROM vllm-test-deps AS vllm-test
200

201
WORKDIR /vllm-workspace
202

203
RUN --mount=type=cache,target=/root/.cache/uv \
204
    --mount=type=bind,from=vllm-build,src=/vllm-workspace/dist,target=dist \
205
206
207
208
209
    uv pip install dist/*.whl

ADD ./tests/ ./tests/
ADD ./examples/ ./examples/
ADD ./benchmarks/ ./benchmarks/
210
ADD ./vllm/collect_env.py .
211
ADD ./.buildkite/ ./.buildkite/
212

youkaichao's avatar
youkaichao committed
213
# install development dependencies (for testing)
214
RUN --mount=type=cache,target=/root/.cache/uv \
215
    uv pip install -e tests/vllm_test_utils
216
217
218
219

######################### RELEASE IMAGE #########################
FROM base AS vllm-openai

220
WORKDIR /vllm-workspace
221
222
223

RUN --mount=type=cache,target=/root/.cache/uv \
    --mount=type=cache,target=/root/.cache/ccache \
224
    --mount=type=bind,from=vllm-build,src=/vllm-workspace/dist,target=dist \
225
    uv pip install dist/*.whl
youkaichao's avatar
youkaichao committed
226

227
228
229
230
231
232
233
234
235
236
237
238
239
240
# Add labels to document build configuration
LABEL org.opencontainers.image.title="vLLM CPU"
LABEL org.opencontainers.image.description="vLLM inference engine for CPU platforms"
LABEL org.opencontainers.image.vendor="vLLM Project"
LABEL org.opencontainers.image.source="https://github.com/vllm-project/vllm"

# Build configuration labels
ARG TARGETARCH
ARG VLLM_CPU_DISABLE_AVX512
ARG VLLM_CPU_AVX2
ARG VLLM_CPU_AVX512
ARG VLLM_CPU_AVX512BF16
ARG VLLM_CPU_AVX512VNNI
ARG VLLM_CPU_AMXBF16
241
ARG VLLM_CPU_ARM_BF16
242
243
244
245
246
247
248
249
250
ARG PYTHON_VERSION

LABEL ai.vllm.build.target-arch="${TARGETARCH}"
LABEL ai.vllm.build.cpu-disable-avx512="${VLLM_CPU_DISABLE_AVX512:-false}"
LABEL ai.vllm.build.cpu-avx2="${VLLM_CPU_AVX2:-false}"
LABEL ai.vllm.build.cpu-avx512="${VLLM_CPU_AVX512:-false}"
LABEL ai.vllm.build.cpu-avx512bf16="${VLLM_CPU_AVX512BF16:-false}"
LABEL ai.vllm.build.cpu-avx512vnni="${VLLM_CPU_AVX512VNNI:-false}"
LABEL ai.vllm.build.cpu-amxbf16="${VLLM_CPU_AMXBF16:-false}"
251
LABEL ai.vllm.build.cpu-arm-bf16="${VLLM_CPU_ARM_BF16:-false}"
252
253
LABEL ai.vllm.build.python-version="${PYTHON_VERSION:-3.12}"

254
ENTRYPOINT ["vllm", "serve"]