Dockerfile.cpu 2.88 KB
Newer Older
1
2
# This vLLM Dockerfile is used to construct image that can build and run vLLM on x86 CPU platform.

3
FROM ubuntu:22.04 AS cpu-test-1
4

5
6
7
8
ENV CCACHE_DIR=/root/.cache/ccache

ENV CMAKE_CXX_COMPILER_LAUNCHER=ccache

9
10
11
RUN --mount=type=cache,target=/var/cache/apt \
    apt-get update -y \
    && apt-get install -y curl ccache git wget vim numactl gcc-12 g++-12 python3 python3-pip libtcmalloc-minimal4 libnuma-dev \
12
    && apt-get install -y ffmpeg libsm6 libxext6 libgl1 \
13
14
    && update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-12 10 --slave /usr/bin/g++ g++ /usr/bin/g++-12

15
16
17
# https://intel.github.io/intel-extension-for-pytorch/cpu/latest/tutorials/performance_tuning/tuning_guide.html
# intel-openmp provides additional performance improvement vs. openmp
# tcmalloc provides better memory allocation efficiency, e.g, holding memory in caches to speed up access of commonly-used objects.
18
19
RUN --mount=type=cache,target=/root/.cache/pip \
    pip install intel-openmp
20

21
ENV LD_PRELOAD="/usr/lib/x86_64-linux-gnu/libtcmalloc_minimal.so.4:/usr/local/lib/libiomp5.so"
22

23
RUN echo 'ulimit -c 0' >> ~/.bashrc
24

25
RUN pip install intel_extension_for_pytorch==2.4.0
26

27
28
WORKDIR /workspace

29
30
ARG PIP_EXTRA_INDEX_URL="https://download.pytorch.org/whl/cpu"
ENV PIP_EXTRA_INDEX_URL=${PIP_EXTRA_INDEX_URL}
31
32
33
34
RUN --mount=type=cache,target=/root/.cache/pip \
    --mount=type=bind,src=requirements-build.txt,target=requirements-build.txt \
    pip install --upgrade pip && \
    pip install -r requirements-build.txt
35

36
37
38
39
40
41
42
43
44
45
46
47
48
# install oneDNN
RUN git clone -b rls-v3.5 https://github.com/oneapi-src/oneDNN.git

RUN --mount=type=cache,target=/root/.cache/ccache \
    cmake -B ./oneDNN/build -S ./oneDNN -G Ninja -DONEDNN_LIBRARY_TYPE=STATIC \ 
    -DONEDNN_BUILD_DOC=OFF \ 
    -DONEDNN_BUILD_EXAMPLES=OFF \ 
    -DONEDNN_BUILD_TESTS=OFF \ 
    -DONEDNN_BUILD_GRAPH=OFF \ 
    -DONEDNN_ENABLE_WORKLOAD=INFERENCE \ 
    -DONEDNN_ENABLE_PRIMITIVE=MATMUL && \
    cmake --build ./oneDNN/build --target install --config Release

49
50
FROM cpu-test-1 AS build

51
52
WORKDIR /workspace/vllm

53
54
55
56
57
58
RUN --mount=type=cache,target=/root/.cache/pip \
    --mount=type=bind,src=requirements-common.txt,target=requirements-common.txt \
    --mount=type=bind,src=requirements-cpu.txt,target=requirements-cpu.txt \
    pip install -v -r requirements-cpu.txt

COPY ./ ./
59

60
61
62
63
# Support for building with non-AVX512 vLLM: docker build --build-arg VLLM_CPU_DISABLE_AVX512="true" ...
ARG VLLM_CPU_DISABLE_AVX512
ENV VLLM_CPU_DISABLE_AVX512=${VLLM_CPU_DISABLE_AVX512}

64
65
RUN --mount=type=cache,target=/root/.cache/pip \
    --mount=type=cache,target=/root/.cache/ccache \
66
    --mount=type=bind,source=.git,target=.git \
67
    VLLM_TARGET_DEVICE=cpu python3 setup.py bdist_wheel && \
68
69
    pip install dist/*.whl && \
    rm -rf dist
70

71
72
WORKDIR /workspace/

73
RUN ln -s /workspace/vllm/tests && ln -s /workspace/vllm/examples && ln -s /workspace/vllm/benchmarks
74

75
ENTRYPOINT ["python3", "-m", "vllm.entrypoints.openai.api_server"]