Dockerfile 8.74 KB
Newer Older
Simon Mo's avatar
Simon Mo committed
1
2
3
# The vLLM Dockerfile is used to construct vLLM image that can be directly used
# to run the OpenAI compatible server.

4
5
6
7
# Please update any changes made here to
# docs/source/dev/dockerfile/dockerfile.rst and
# docs/source/assets/dev/dockerfile-stages-dependency.png

8
ARG CUDA_VERSION=12.4.1
Simon Mo's avatar
Simon Mo committed
9
#################### BASE BUILD IMAGE ####################
10
# prepare basic build environment
11
FROM nvidia/cuda:${CUDA_VERSION}-devel-ubuntu20.04 AS base
12
13

ARG CUDA_VERSION=12.4.1
14
ARG PYTHON_VERSION=3.10
15
16
17
18
19
20
21
22
23

ENV DEBIAN_FRONTEND=noninteractive

RUN echo 'tzdata tzdata/Areas select America' | debconf-set-selections \
    && echo 'tzdata tzdata/Zones/America select Los_Angeles' | debconf-set-selections \
    && apt-get update -y \
    && apt-get install -y ccache software-properties-common \
    && add-apt-repository ppa:deadsnakes/ppa \
    && apt-get update -y \
24
    && apt-get install -y python${PYTHON_VERSION} python${PYTHON_VERSION}-dev python${PYTHON_VERSION}-venv \
25
    && if [ "${PYTHON_VERSION}" != "3" ]; then update-alternatives --install /usr/bin/python3 python3 /usr/bin/python${PYTHON_VERSION} 1; fi \
26
    && python3 --version
Stephen Krider's avatar
Stephen Krider committed
27
28

RUN apt-get update -y \
29
30
31
32
33
    && apt-get install -y git curl sudo

# Install pip s.t. it will be compatible with our PYTHON_VERSION
RUN curl -sS https://bootstrap.pypa.io/get-pip.py | python${PYTHON_VERSION}
RUN python3 -m pip --version
Stephen Krider's avatar
Stephen Krider committed
34

35
36
37
38
# Workaround for https://github.com/openai/triton/issues/2507 and
# https://github.com/pytorch/pytorch/issues/107960 -- hopefully
# this won't be needed for future versions of this docker image
# or future versions of triton.
39
RUN ldconfig /usr/local/cuda-$(echo $CUDA_VERSION | cut -d. -f1,2)/compat/
40

Stephen Krider's avatar
Stephen Krider committed
41
42
43
WORKDIR /workspace

# install build and runtime dependencies
44
COPY requirements-common.txt requirements-common.txt
45
COPY requirements-adag.txt requirements-adag.txt
46
COPY requirements-cuda.txt requirements-cuda.txt
Stephen Krider's avatar
Stephen Krider committed
47
RUN --mount=type=cache,target=/root/.cache/pip \
48
    python3 -m pip install -r requirements-cuda.txt
49

Mor Zusman's avatar
Mor Zusman committed
50
51
52
53
COPY requirements-mamba.txt requirements-mamba.txt
RUN python3 -m pip install packaging
RUN python3 -m pip install -r requirements-mamba.txt

54
55
56
57
58
59
# cuda arch list used by torch
# can be useful for both `dev` and `test`
# explicitly set the list to avoid issues with torch 2.2
# see https://github.com/pytorch/pytorch/pull/123243
ARG torch_cuda_arch_list='7.0 7.5 8.0 8.6 8.9 9.0+PTX'
ENV TORCH_CUDA_ARCH_LIST=${torch_cuda_arch_list}
Simon Mo's avatar
Simon Mo committed
60
61
#################### BASE BUILD IMAGE ####################

62
#################### WHEEL BUILD IMAGE ####################
63
64
FROM base AS build

65
ARG PYTHON_VERSION=3.10
Stephen Krider's avatar
Stephen Krider committed
66

67
68
# install build dependencies
COPY requirements-build.txt requirements-build.txt
69

70
RUN --mount=type=cache,target=/root/.cache/pip \
71
    python3 -m pip install -r requirements-build.txt
72

73
74
75
# install compiler cache to speed up compilation leveraging local or remote caching
RUN apt-get update -y && apt-get install -y ccache

76
# files and directories related to build wheels
Stephen Krider's avatar
Stephen Krider committed
77
78
COPY csrc csrc
COPY setup.py setup.py
bnellnm's avatar
bnellnm committed
79
80
COPY cmake cmake
COPY CMakeLists.txt CMakeLists.txt
81
COPY requirements-common.txt requirements-common.txt
82
COPY requirements-adag.txt requirements-adag.txt
83
COPY requirements-cuda.txt requirements-cuda.txt
Stephen Krider's avatar
Stephen Krider committed
84
COPY pyproject.toml pyproject.toml
85
COPY vllm vllm
Stephen Krider's avatar
Stephen Krider committed
86
87

# max jobs used by Ninja to build extensions
88
89
ARG max_jobs=2
ENV MAX_JOBS=${max_jobs}
90
91
92
# number of threads used by nvcc
ARG nvcc_threads=8
ENV NVCC_THREADS=$nvcc_threads
93

youkaichao's avatar
youkaichao committed
94
95
96
ARG buildkite_commit
ENV BUILDKITE_COMMIT=${buildkite_commit}

97
98
99
100
101
102
103
104
105
ARG USE_SCCACHE
# if USE_SCCACHE is set, use sccache to speed up compilation
RUN --mount=type=cache,target=/root/.cache/pip \
    if [ "$USE_SCCACHE" = "1" ]; then \
        echo "Installing sccache..." \
        && curl -L -o sccache.tar.gz https://github.com/mozilla/sccache/releases/download/v0.8.1/sccache-v0.8.1-x86_64-unknown-linux-musl.tar.gz \
        && tar -xzf sccache.tar.gz \
        && sudo mv sccache-v0.8.1-x86_64-unknown-linux-musl/sccache /usr/bin/sccache \
        && rm -rf sccache.tar.gz sccache-v0.8.1-x86_64-unknown-linux-musl \
106
107
108
109
110
        && if [ "$CUDA_VERSION" = "11.8.0" ]; then \
            export SCCACHE_BUCKET=vllm-build-sccache-2; \
           else \
            export SCCACHE_BUCKET=vllm-build-sccache; \
           fi \
111
        && export SCCACHE_REGION=us-west-2 \
112
        && export CMAKE_BUILD_TYPE=Release \
113
        && sccache --show-stats \
114
        && python3 setup.py bdist_wheel --dist-dir=dist --py-limited-api=cp38 \
115
116
117
        && sccache --show-stats; \
    fi

118
119
ENV CCACHE_DIR=/root/.cache/ccache
RUN --mount=type=cache,target=/root/.cache/ccache \
120
    --mount=type=cache,target=/root/.cache/pip \
121
    if [ "$USE_SCCACHE" != "1" ]; then \
122
        python3 setup.py bdist_wheel --dist-dir=dist --py-limited-api=cp38; \
123
    fi
124

125
126
127
128
# check the size of the wheel, we cannot upload wheels larger than 100MB
COPY .buildkite/check-wheel-size.py check-wheel-size.py
RUN python3 check-wheel-size.py dist

Simon Mo's avatar
Simon Mo committed
129
#################### EXTENSION Build IMAGE ####################
Stephen Krider's avatar
Stephen Krider committed
130

131
132
133
134
135
136
137
138
139
140
#################### DEV IMAGE ####################
FROM base as dev

COPY requirements-lint.txt requirements-lint.txt
COPY requirements-test.txt requirements-test.txt
COPY requirements-dev.txt requirements-dev.txt
RUN --mount=type=cache,target=/root/.cache/pip \
    python3 -m pip install -r requirements-dev.txt

#################### DEV IMAGE ####################
Mor Zusman's avatar
Mor Zusman committed
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
#################### MAMBA Build IMAGE ####################
FROM dev as mamba-builder
# max jobs used for build
ARG max_jobs=2
ENV MAX_JOBS=${max_jobs}

WORKDIR /usr/src/mamba

COPY requirements-mamba.txt requirements-mamba.txt

# Download the wheel or build it if a pre-compiled release doesn't exist
RUN pip --verbose wheel -r requirements-mamba.txt \
    --no-build-isolation --no-deps --no-cache-dir

#################### MAMBA Build IMAGE ####################
156

157
158
#################### vLLM installation IMAGE ####################
# image with vLLM installed
159
FROM nvidia/cuda:${CUDA_VERSION}-base-ubuntu20.04 AS vllm-base
160
ARG CUDA_VERSION=12.4.1
161
ARG PYTHON_VERSION=3.10
Simon Mo's avatar
Simon Mo committed
162
WORKDIR /vllm-workspace
163

164
165
166
167
168
169
170
171
172
173
RUN echo 'tzdata tzdata/Areas select America' | debconf-set-selections \
    && echo 'tzdata tzdata/Zones/America select Los_Angeles' | debconf-set-selections \
    && apt-get update -y \
    && apt-get install -y ccache software-properties-common \
    && add-apt-repository ppa:deadsnakes/ppa \
    && apt-get update -y \
    && apt-get install -y python${PYTHON_VERSION} python${PYTHON_VERSION}-dev python${PYTHON_VERSION}-venv \
    && if [ "${PYTHON_VERSION}" != "3" ]; then update-alternatives --install /usr/bin/python3 python3 /usr/bin/python${PYTHON_VERSION} 1; fi \
    && python3 --version

174
RUN apt-get update -y \
175
    && apt-get install -y python3-pip git vim curl libibverbs-dev
176
177
178
179

# Install pip s.t. it will be compatible with our PYTHON_VERSION
RUN curl -sS https://bootstrap.pypa.io/get-pip.py | python${PYTHON_VERSION}
RUN python3 -m pip --version
180
181
182
183
184

# Workaround for https://github.com/openai/triton/issues/2507 and
# https://github.com/pytorch/pytorch/issues/107960 -- hopefully
# this won't be needed for future versions of this docker image
# or future versions of triton.
185
RUN ldconfig /usr/local/cuda-$(echo $CUDA_VERSION | cut -d. -f1,2)/compat/
186
187
188
189

# install vllm wheel first, so that torch etc will be installed
RUN --mount=type=bind,from=build,src=/workspace/dist,target=/vllm-workspace/dist \
    --mount=type=cache,target=/root/.cache/pip \
190
    python3 -m pip install dist/*.whl --verbose
Mor Zusman's avatar
Mor Zusman committed
191
192
193
194

RUN --mount=type=bind,from=mamba-builder,src=/usr/src/mamba,target=/usr/src/mamba \
    --mount=type=cache,target=/root/.cache/pip \
    python3 -m pip install /usr/src/mamba/*.whl --no-cache-dir
195
196

RUN --mount=type=cache,target=/root/.cache/pip \
197
    python3 -m pip install https://github.com/flashinfer-ai/flashinfer/releases/download/v0.1.3/flashinfer-0.1.3+cu121torch2.4-cp310-cp310-linux_x86_64.whl
198
#################### vLLM installation IMAGE ####################
Stephen Krider's avatar
Stephen Krider committed
199
200


201
202
203
204
#################### TEST IMAGE ####################
# image to run unit testing suite
# note that this uses vllm installed by `pip`
FROM vllm-base AS test
Stephen Krider's avatar
Stephen Krider committed
205

206
ADD . /vllm-workspace/
Stephen Krider's avatar
Stephen Krider committed
207

208
# install development dependencies (for testing)
Stephen Krider's avatar
Stephen Krider committed
209
RUN --mount=type=cache,target=/root/.cache/pip \
210
    python3 -m pip install -r requirements-dev.txt
211

212
213
214
215
216
217
# doc requires source code
# we hide them inside `test_docs/` , so that this source code
# will not be imported by other tests
RUN mkdir test_docs
RUN mv docs test_docs/
RUN mv vllm test_docs/
Stephen Krider's avatar
Stephen Krider committed
218

219
#################### TEST IMAGE ####################
Stephen Krider's avatar
Stephen Krider committed
220

Simon Mo's avatar
Simon Mo committed
221
#################### OPENAI API SERVER ####################
Stephen Krider's avatar
Stephen Krider committed
222
223
# openai api server alternative
FROM vllm-base AS vllm-openai
224

225
# install additional dependencies for openai api server
Stephen Krider's avatar
Stephen Krider committed
226
RUN --mount=type=cache,target=/root/.cache/pip \
227
    pip install accelerate hf_transfer 'modelscope!=1.15.0'
Stephen Krider's avatar
Stephen Krider committed
228

yhu422's avatar
yhu422 committed
229
230
ENV VLLM_USAGE_SOURCE production-docker-image

Stephen Krider's avatar
Stephen Krider committed
231
ENTRYPOINT ["python3", "-m", "vllm.entrypoints.openai.api_server"]
Simon Mo's avatar
Simon Mo committed
232
#################### OPENAI API SERVER ####################