Dockerfile.cpu 8.45 KB
Newer Older
1
2
3
4
5
6
7
8
# This vLLM Dockerfile is used to build images that can run vLLM on both x86_64 and arm64 CPU platforms.
#
# Supported platforms:
#   - linux/amd64 (x86_64)
#   - linux/arm64 (aarch64)
#
# Use the `--platform` option with `docker buildx build` to specify the target architecture, e.g.:
#   docker buildx build --platform=linux/arm64 -f docker/Dockerfile.cpu .
9
10
11
12
13
14
15
#
# Build targets:
#   vllm-openai (default): used for serving deployment
#   vllm-test: used for CI tests
#   vllm-dev: used for development
#
# Build arguments:
16
#   PYTHON_VERSION=3.13|3.12 (default)|3.11|3.10
17
#   VLLM_CPU_DISABLE_AVX512=false (default)|true
18
19
20
21
22
#   VLLM_CPU_AVX2=false (default)|true (for cross-compilation)
#   VLLM_CPU_AVX512=false (default)|true (for cross-compilation)
#   VLLM_CPU_AVX512BF16=false (default)|true (for cross-compilation)
#   VLLM_CPU_AVX512VNNI=false (default)|true (for cross-compilation)
#   VLLM_CPU_AMXBF16=false (default)|true (for cross-compilation)
23
24
#

25
26
######################### COMMON BASE IMAGE #########################
FROM ubuntu:22.04 AS base-common
27

28
WORKDIR /workspace
29

30
31
ARG PYTHON_VERSION=3.12
ARG PIP_EXTRA_INDEX_URL="https://download.pytorch.org/whl/cpu"
32

33
34
35
36
# Install minimal dependencies and uv
RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
    --mount=type=cache,target=/var/lib/apt,sharing=locked \
    apt-get update -y \
37
    && apt-get install -y --no-install-recommends sudo ccache git curl wget ca-certificates \
38
    gcc-12 g++-12 libtcmalloc-minimal4 libnuma-dev ffmpeg libsm6 libxext6 libgl1 jq lsof \
39
40
41
    && update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-12 10 --slave /usr/bin/g++ g++ /usr/bin/g++-12 \
    && curl -LsSf https://astral.sh/uv/install.sh | sh

42
ENV CC=/usr/bin/gcc-12 CXX=/usr/bin/g++-12
43
ENV CCACHE_DIR=/root/.cache/ccache
44
45
ENV CMAKE_CXX_COMPILER_LAUNCHER=ccache

46
47
ENV PATH="/root/.local/bin:$PATH"
ENV VIRTUAL_ENV="/opt/venv"
48
ENV UV_PYTHON_INSTALL_DIR=/opt/uv/python
49
50
RUN uv venv --python ${PYTHON_VERSION} --seed ${VIRTUAL_ENV}
ENV PATH="$VIRTUAL_ENV/bin:$PATH"
51

52
ENV UV_HTTP_TIMEOUT=500
53

54
# Install Python dependencies
55
56
57
58
ENV PIP_EXTRA_INDEX_URL=${PIP_EXTRA_INDEX_URL}
ENV UV_EXTRA_INDEX_URL=${PIP_EXTRA_INDEX_URL}
ENV UV_INDEX_STRATEGY="unsafe-best-match"
ENV UV_LINK_MODE="copy"
59
60
61
62
63

# Copy requirements files for installation
COPY requirements/common.txt requirements/common.txt
COPY requirements/cpu.txt requirements/cpu.txt

64
65
66
RUN --mount=type=cache,target=/root/.cache/uv \
    uv pip install --upgrade pip && \
    uv pip install -r requirements/cpu.txt
67

68
69
70
ARG TARGETARCH
ENV TARGETARCH=${TARGETARCH}

71
72
73
74
######################### x86_64 BASE IMAGE #########################
FROM base-common AS base-amd64

ENV LD_PRELOAD="/usr/lib/x86_64-linux-gnu/libtcmalloc_minimal.so.4:/opt/venv/lib/libiomp5.so"
75

76
77
######################### arm64 BASE IMAGE #########################
FROM base-common AS base-arm64
78

79
80
81
82
ENV LD_PRELOAD="/usr/lib/aarch64-linux-gnu/libtcmalloc_minimal.so.4"

######################### BASE IMAGE #########################
FROM base-${TARGETARCH} AS base
83

84
RUN echo 'ulimit -c 0' >> ~/.bashrc
85

86
87
######################### BUILD IMAGE #########################
FROM base AS vllm-build
88

89
ARG max_jobs=32
90
91
ENV MAX_JOBS=${max_jobs}

92
93
ARG GIT_REPO_CHECK=0
# Support for building with non-AVX512 vLLM: docker build --build-arg VLLM_CPU_DISABLE_AVX512="true" ...
94
ARG VLLM_CPU_DISABLE_AVX512=0
95
ENV VLLM_CPU_DISABLE_AVX512=${VLLM_CPU_DISABLE_AVX512}
96
97
98
99
100
101
# Support for cross-compilation with AVX2 ISA: docker build --build-arg VLLM_CPU_AVX2="1" ...
ARG VLLM_CPU_AVX2=0
ENV VLLM_CPU_AVX2=${VLLM_CPU_AVX2}
# Support for cross-compilation with AVX512 ISA: docker build --build-arg VLLM_CPU_AVX512="1" ...
ARG VLLM_CPU_AVX512=0
ENV VLLM_CPU_AVX512=${VLLM_CPU_AVX512}
102
103
104
105
106
107
# Support for building with AVX512BF16 ISA: docker build --build-arg VLLM_CPU_AVX512BF16="true" ...
ARG VLLM_CPU_AVX512BF16=0
ENV VLLM_CPU_AVX512BF16=${VLLM_CPU_AVX512BF16}
# Support for building with AVX512VNNI ISA: docker build --build-arg VLLM_CPU_AVX512VNNI="true" ...
ARG VLLM_CPU_AVX512VNNI=0
ENV VLLM_CPU_AVX512VNNI=${VLLM_CPU_AVX512VNNI}
108
# Support for building with AMXBF16 ISA: docker build --build-arg VLLM_CPU_AMXBF16="true" ...
Li, Jiang's avatar
Li, Jiang committed
109
ARG VLLM_CPU_AMXBF16=1
110
ENV VLLM_CPU_AMXBF16=${VLLM_CPU_AMXBF16}
111

112
WORKDIR /vllm-workspace
113

114
115
116
# Copy build requirements
COPY requirements/cpu-build.txt requirements/build.txt

117
118
RUN --mount=type=cache,target=/root/.cache/uv \
    uv pip install -r requirements/build.txt
119

120
COPY . .
121
122

RUN if [ "$GIT_REPO_CHECK" != 0 ]; then bash tools/check_repo.sh ; fi
123

124
125
RUN --mount=type=cache,target=/root/.cache/uv \
    --mount=type=cache,target=/root/.cache/ccache \
126
    --mount=type=cache,target=/vllm-workspace/.deps,sharing=locked \
127
    VLLM_TARGET_DEVICE=cpu python3 setup.py bdist_wheel --dist-dir=dist --py-limited-api=cp38
128

129
130
131
######################### TEST DEPS #########################
FROM base AS vllm-test-deps

132
WORKDIR /vllm-workspace
133

134
135
136
137
# Copy test requirements
COPY requirements/test.in requirements/cpu-test.in

RUN \
138
    sed -i '/mamba_ssm/d' requirements/cpu-test.in && \
139
    remove_packages_not_supported_on_aarch64() { \
140
141
142
143
144
145
    case "$(uname -m)" in \
    aarch64|arm64) \
    sed -i '/decord/d' requirements/cpu-test.in; \
    sed -i '/terratorch/d' requirements/cpu-test.in; \
    ;; \
    esac; \
146
147
    }; \
    remove_packages_not_supported_on_aarch64 && \
148
    sed -i 's/^torch==.*/torch==2.10.0/g' requirements/cpu-test.in && \
149
150
    sed -i 's/torchaudio.*/torchaudio/g' requirements/cpu-test.in && \
    sed -i 's/torchvision.*/torchvision/g' requirements/cpu-test.in && \
151
152
153
    uv pip compile requirements/cpu-test.in -o requirements/cpu-test.txt --index-strategy unsafe-best-match --torch-backend cpu

RUN --mount=type=cache,target=/root/.cache/uv \
154
    uv pip install -r requirements/cpu-test.txt
155

156
157
158
######################### DEV IMAGE #########################
FROM vllm-build AS vllm-dev

159
WORKDIR /vllm-workspace
160
161
162

RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
    --mount=type=cache,target=/var/lib/apt,sharing=locked \
163
164
165
    apt-get install -y --no-install-recommends vim numactl xz-utils make clangd-14

RUN ln -s /usr/bin/clangd-14 /usr/bin/clangd
166
167
168

# install development dependencies (for testing)
RUN --mount=type=cache,target=/root/.cache/uv \
169
    uv pip install -e tests/vllm_test_utils
170

171
RUN --mount=type=cache,target=/root/.cache/uv \
172
    --mount=type=cache,target=/root/.cache/ccache \
173
    --mount=type=bind,source=.git,target=.git \
174
    VLLM_TARGET_DEVICE=cpu python3 setup.py develop
175

176
COPY --from=vllm-test-deps /vllm-workspace/requirements/cpu-test.txt requirements/test.txt
177

178
179
180
181
182
183
184
RUN --mount=type=cache,target=/root/.cache/uv \
    uv pip install -r requirements/dev.txt && \
    pre-commit install --hook-type pre-commit --hook-type commit-msg

ENTRYPOINT ["bash"]

######################### TEST IMAGE #########################
185
FROM vllm-test-deps AS vllm-test
186

187
WORKDIR /vllm-workspace
188

189
RUN --mount=type=cache,target=/root/.cache/uv \
190
    --mount=type=bind,from=vllm-build,src=/vllm-workspace/dist,target=dist \
191
192
193
194
195
    uv pip install dist/*.whl

ADD ./tests/ ./tests/
ADD ./examples/ ./examples/
ADD ./benchmarks/ ./benchmarks/
196
ADD ./vllm/collect_env.py .
197
ADD ./.buildkite/ ./.buildkite/
198

youkaichao's avatar
youkaichao committed
199
# install development dependencies (for testing)
200
RUN --mount=type=cache,target=/root/.cache/uv \
201
    uv pip install -e tests/vllm_test_utils
202
203
204
205

######################### RELEASE IMAGE #########################
FROM base AS vllm-openai

206
WORKDIR /vllm-workspace
207
208
209

RUN --mount=type=cache,target=/root/.cache/uv \
    --mount=type=cache,target=/root/.cache/ccache \
210
    --mount=type=bind,from=vllm-build,src=/vllm-workspace/dist,target=dist \
211
    uv pip install dist/*.whl
youkaichao's avatar
youkaichao committed
212

213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
# Add labels to document build configuration
LABEL org.opencontainers.image.title="vLLM CPU"
LABEL org.opencontainers.image.description="vLLM inference engine for CPU platforms"
LABEL org.opencontainers.image.vendor="vLLM Project"
LABEL org.opencontainers.image.source="https://github.com/vllm-project/vllm"

# Build configuration labels
ARG TARGETARCH
ARG VLLM_CPU_DISABLE_AVX512
ARG VLLM_CPU_AVX2
ARG VLLM_CPU_AVX512
ARG VLLM_CPU_AVX512BF16
ARG VLLM_CPU_AVX512VNNI
ARG VLLM_CPU_AMXBF16
ARG PYTHON_VERSION

LABEL ai.vllm.build.target-arch="${TARGETARCH}"
LABEL ai.vllm.build.cpu-disable-avx512="${VLLM_CPU_DISABLE_AVX512:-false}"
LABEL ai.vllm.build.cpu-avx2="${VLLM_CPU_AVX2:-false}"
LABEL ai.vllm.build.cpu-avx512="${VLLM_CPU_AVX512:-false}"
LABEL ai.vllm.build.cpu-avx512bf16="${VLLM_CPU_AVX512BF16:-false}"
LABEL ai.vllm.build.cpu-avx512vnni="${VLLM_CPU_AVX512VNNI:-false}"
LABEL ai.vllm.build.cpu-amxbf16="${VLLM_CPU_AMXBF16:-false}"
LABEL ai.vllm.build.python-version="${PYTHON_VERSION:-3.12}"

238
ENTRYPOINT ["vllm", "serve"]