Dockerfile 8.95 KB
Newer Older
Simon Mo's avatar
Simon Mo committed
1
2
3
# The vLLM Dockerfile is used to construct vLLM image that can be directly used
# to run the OpenAI compatible server.

4
5
6
7
# Please update any changes made here to
# docs/source/dev/dockerfile/dockerfile.rst and
# docs/source/assets/dev/dockerfile-stages-dependency.png

8
ARG CUDA_VERSION=12.4.1
Simon Mo's avatar
Simon Mo committed
9
#################### BASE BUILD IMAGE ####################
10
# prepare basic build environment
11
FROM nvidia/cuda:${CUDA_VERSION}-devel-ubuntu20.04 AS base
12
ARG CUDA_VERSION=12.4.1
13
ARG PYTHON_VERSION=3.12
14
15
ENV DEBIAN_FRONTEND=noninteractive

16
# Install Python and other dependencies
17
18
19
RUN echo 'tzdata tzdata/Areas select America' | debconf-set-selections \
    && echo 'tzdata tzdata/Zones/America select Los_Angeles' | debconf-set-selections \
    && apt-get update -y \
20
    && apt-get install -y ccache software-properties-common git curl sudo \
21
22
    && add-apt-repository ppa:deadsnakes/ppa \
    && apt-get update -y \
23
    && apt-get install -y python${PYTHON_VERSION} python${PYTHON_VERSION}-dev python${PYTHON_VERSION}-venv \
24
25
26
27
28
    && update-alternatives --install /usr/bin/python3 python3 /usr/bin/python${PYTHON_VERSION} 1 \
    && update-alternatives --set python3 /usr/bin/python${PYTHON_VERSION} \
    && ln -sf /usr/bin/python${PYTHON_VERSION}-config /usr/bin/python3-config \
    && curl -sS https://bootstrap.pypa.io/get-pip.py | python${PYTHON_VERSION} \
    && python3 --version && python3 -m pip --version
Stephen Krider's avatar
Stephen Krider committed
29

30
31
32
33
34
35
36
37
# Upgrade to GCC 10 to avoid https://gcc.gnu.org/bugzilla/show_bug.cgi?id=92519
# as it was causing spam when compiling the CUTLASS kernels
RUN apt-get install -y gcc-10 g++-10
RUN update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-10 110 --slave /usr/bin/g++ g++ /usr/bin/g++-10
RUN <<EOF
gcc --version
EOF

38
39
40
41
# Workaround for https://github.com/openai/triton/issues/2507 and
# https://github.com/pytorch/pytorch/issues/107960 -- hopefully
# this won't be needed for future versions of this docker image
# or future versions of triton.
42
RUN ldconfig /usr/local/cuda-$(echo $CUDA_VERSION | cut -d. -f1,2)/compat/
43

Stephen Krider's avatar
Stephen Krider committed
44
45
46
WORKDIR /workspace

# install build and runtime dependencies
47
48
COPY requirements-common.txt requirements-common.txt
COPY requirements-cuda.txt requirements-cuda.txt
Stephen Krider's avatar
Stephen Krider committed
49
RUN --mount=type=cache,target=/root/.cache/pip \
50
    python3 -m pip install -r requirements-cuda.txt
51

Mor Zusman's avatar
Mor Zusman committed
52

53
54
55
56
57
58
# cuda arch list used by torch
# can be useful for both `dev` and `test`
# explicitly set the list to avoid issues with torch 2.2
# see https://github.com/pytorch/pytorch/pull/123243
ARG torch_cuda_arch_list='7.0 7.5 8.0 8.6 8.9 9.0+PTX'
ENV TORCH_CUDA_ARCH_LIST=${torch_cuda_arch_list}
59
60
61
# Override the arch list for flash-attn to reduce the binary size
ARG vllm_fa_cmake_gpu_arches='80-real;90-real'
ENV VLLM_FA_CMAKE_GPU_ARCHES=${vllm_fa_cmake_gpu_arches}
Simon Mo's avatar
Simon Mo committed
62
63
#################### BASE BUILD IMAGE ####################

64
#################### WHEEL BUILD IMAGE ####################
65
66
FROM base AS build

67
68
# install build dependencies
COPY requirements-build.txt requirements-build.txt
69

70
RUN --mount=type=cache,target=/root/.cache/pip \
71
    python3 -m pip install -r requirements-build.txt
72

73
# files and directories related to build wheels
Stephen Krider's avatar
Stephen Krider committed
74
75
COPY csrc csrc
COPY setup.py setup.py
bnellnm's avatar
bnellnm committed
76
77
COPY cmake cmake
COPY CMakeLists.txt CMakeLists.txt
78
79
COPY requirements-common.txt requirements-common.txt
COPY requirements-cuda.txt requirements-cuda.txt
Stephen Krider's avatar
Stephen Krider committed
80
COPY pyproject.toml pyproject.toml
81
COPY vllm vllm
Stephen Krider's avatar
Stephen Krider committed
82
83

# max jobs used by Ninja to build extensions
84
85
ARG max_jobs=2
ENV MAX_JOBS=${max_jobs}
86
87
88
# number of threads used by nvcc
ARG nvcc_threads=8
ENV NVCC_THREADS=$nvcc_threads
89

90
ARG USE_SCCACHE
91
92
ARG SCCACHE_BUCKET_NAME=vllm-build-sccache
ARG SCCACHE_REGION_NAME=us-west-2
93
ARG SCCACHE_S3_NO_CREDENTIALS=0
94
95
# if USE_SCCACHE is set, use sccache to speed up compilation
RUN --mount=type=cache,target=/root/.cache/pip \
96
    --mount=type=bind,source=.git,target=.git \
97
98
99
100
101
102
    if [ "$USE_SCCACHE" = "1" ]; then \
        echo "Installing sccache..." \
        && curl -L -o sccache.tar.gz https://github.com/mozilla/sccache/releases/download/v0.8.1/sccache-v0.8.1-x86_64-unknown-linux-musl.tar.gz \
        && tar -xzf sccache.tar.gz \
        && sudo mv sccache-v0.8.1-x86_64-unknown-linux-musl/sccache /usr/bin/sccache \
        && rm -rf sccache.tar.gz sccache-v0.8.1-x86_64-unknown-linux-musl \
103
104
        && export SCCACHE_BUCKET=${SCCACHE_BUCKET_NAME} \
        && export SCCACHE_REGION=${SCCACHE_REGION_NAME} \
105
        && export SCCACHE_S3_NO_CREDENTIALS=${SCCACHE_S3_NO_CREDENTIALS} \
106
        && export SCCACHE_IDLE_TIMEOUT=0 \
107
        && export CMAKE_BUILD_TYPE=Release \
108
        && sccache --show-stats \
109
        && python3 setup.py bdist_wheel --dist-dir=dist --py-limited-api=cp38 \
110
111
112
        && sccache --show-stats; \
    fi

113
114
ENV CCACHE_DIR=/root/.cache/ccache
RUN --mount=type=cache,target=/root/.cache/ccache \
115
    --mount=type=cache,target=/root/.cache/pip \
116
    --mount=type=bind,source=.git,target=.git  \
117
    if [ "$USE_SCCACHE" != "1" ]; then \
118
        python3 setup.py bdist_wheel --dist-dir=dist --py-limited-api=cp38; \
119
    fi
120

121
# Check the size of the wheel if RUN_WHEEL_CHECK is true
122
COPY .buildkite/check-wheel-size.py check-wheel-size.py
123
124
125
126
127
128
129
130
131
# Default max size of the wheel is 250MB
ARG VLLM_MAX_SIZE_MB=250
ENV VLLM_MAX_SIZE_MB=$VLLM_MAX_SIZE_MB
ARG RUN_WHEEL_CHECK=true
RUN if [ "$RUN_WHEEL_CHECK" = "true" ]; then \
        python3 check-wheel-size.py dist; \
    else \
        echo "Skipping wheel size check."; \
    fi
Simon Mo's avatar
Simon Mo committed
132
#################### EXTENSION Build IMAGE ####################
Stephen Krider's avatar
Stephen Krider committed
133

134
135
136
137
138
139
140
141
142
143
#################### DEV IMAGE ####################
FROM base as dev

COPY requirements-lint.txt requirements-lint.txt
COPY requirements-test.txt requirements-test.txt
COPY requirements-dev.txt requirements-dev.txt
RUN --mount=type=cache,target=/root/.cache/pip \
    python3 -m pip install -r requirements-dev.txt

#################### DEV IMAGE ####################
144
145
#################### vLLM installation IMAGE ####################
# image with vLLM installed
146
FROM nvidia/cuda:${CUDA_VERSION}-base-ubuntu20.04 AS vllm-base
147
ARG CUDA_VERSION=12.4.1
148
ARG PYTHON_VERSION=3.12
Simon Mo's avatar
Simon Mo committed
149
WORKDIR /vllm-workspace
150
151
152
153
ENV DEBIAN_FRONTEND=noninteractive

RUN PYTHON_VERSION_STR=$(echo ${PYTHON_VERSION} | sed 's/\.//g') && \
    echo "export PYTHON_VERSION_STR=${PYTHON_VERSION_STR}" >> /etc/environment
154

155
# Install Python and other dependencies
156
157
158
RUN echo 'tzdata tzdata/Areas select America' | debconf-set-selections \
    && echo 'tzdata tzdata/Zones/America select Los_Angeles' | debconf-set-selections \
    && apt-get update -y \
159
    && apt-get install -y ccache software-properties-common git curl sudo vim python3-pip \
160
    && apt-get install -y ffmpeg libsm6 libxext6 libgl1 \
161
162
    && add-apt-repository ppa:deadsnakes/ppa \
    && apt-get update -y \
163
164
165
166
167
168
    && apt-get install -y python${PYTHON_VERSION} python${PYTHON_VERSION}-dev python${PYTHON_VERSION}-venv libibverbs-dev \
    && update-alternatives --install /usr/bin/python3 python3 /usr/bin/python${PYTHON_VERSION} 1 \
    && update-alternatives --set python3 /usr/bin/python${PYTHON_VERSION} \
    && ln -sf /usr/bin/python${PYTHON_VERSION}-config /usr/bin/python3-config \
    && curl -sS https://bootstrap.pypa.io/get-pip.py | python${PYTHON_VERSION} \
    && python3 --version && python3 -m pip --version
169
170
171
172
173

# Workaround for https://github.com/openai/triton/issues/2507 and
# https://github.com/pytorch/pytorch/issues/107960 -- hopefully
# this won't be needed for future versions of this docker image
# or future versions of triton.
174
RUN ldconfig /usr/local/cuda-$(echo $CUDA_VERSION | cut -d. -f1,2)/compat/
175
176
177
178

# install vllm wheel first, so that torch etc will be installed
RUN --mount=type=bind,from=build,src=/workspace/dist,target=/vllm-workspace/dist \
    --mount=type=cache,target=/root/.cache/pip \
179
    python3 -m pip install dist/*.whl --verbose
Mor Zusman's avatar
Mor Zusman committed
180

181
RUN --mount=type=cache,target=/root/.cache/pip \
182
    . /etc/environment && \
183
    python3 -m pip install https://github.com/flashinfer-ai/flashinfer/releases/download/v0.1.6/flashinfer-0.1.6+cu121torch2.4-cp${PYTHON_VERSION_STR}-cp${PYTHON_VERSION_STR}-linux_x86_64.whl
184
#################### vLLM installation IMAGE ####################
Stephen Krider's avatar
Stephen Krider committed
185
186


187
188
189
190
#################### TEST IMAGE ####################
# image to run unit testing suite
# note that this uses vllm installed by `pip`
FROM vllm-base AS test
Stephen Krider's avatar
Stephen Krider committed
191

192
ADD . /vllm-workspace/
Stephen Krider's avatar
Stephen Krider committed
193

194
# install development dependencies (for testing)
Stephen Krider's avatar
Stephen Krider committed
195
RUN --mount=type=cache,target=/root/.cache/pip \
196
    python3 -m pip install -r requirements-dev.txt
197

198
199
200
201
202
203
# doc requires source code
# we hide them inside `test_docs/` , so that this source code
# will not be imported by other tests
RUN mkdir test_docs
RUN mv docs test_docs/
RUN mv vllm test_docs/
Stephen Krider's avatar
Stephen Krider committed
204

205
#################### TEST IMAGE ####################
Stephen Krider's avatar
Stephen Krider committed
206

Simon Mo's avatar
Simon Mo committed
207
#################### OPENAI API SERVER ####################
Stephen Krider's avatar
Stephen Krider committed
208
209
# openai api server alternative
FROM vllm-base AS vllm-openai
210

211
# install additional dependencies for openai api server
Stephen Krider's avatar
Stephen Krider committed
212
RUN --mount=type=cache,target=/root/.cache/pip \
213
    pip install accelerate hf_transfer 'modelscope!=1.15.0' bitsandbytes>=0.44.0 timm==0.9.10
Stephen Krider's avatar
Stephen Krider committed
214

yhu422's avatar
yhu422 committed
215
216
ENV VLLM_USAGE_SOURCE production-docker-image

Stephen Krider's avatar
Stephen Krider committed
217
ENTRYPOINT ["python3", "-m", "vllm.entrypoints.openai.api_server"]
Simon Mo's avatar
Simon Mo committed
218
#################### OPENAI API SERVER ####################