Dockerfile 8.9 KB
Newer Older
Simon Mo's avatar
Simon Mo committed
1
2
3
# The vLLM Dockerfile is used to construct vLLM image that can be directly used
# to run the OpenAI compatible server.

4
5
6
7
# Please update any changes made here to
# docs/source/dev/dockerfile/dockerfile.rst and
# docs/source/assets/dev/dockerfile-stages-dependency.png

8
ARG CUDA_VERSION=12.4.1
Simon Mo's avatar
Simon Mo committed
9
#################### BASE BUILD IMAGE ####################
10
# prepare basic build environment
11
FROM nvidia/cuda:${CUDA_VERSION}-devel-ubuntu20.04 AS base
12
ARG CUDA_VERSION=12.4.1
13
ARG PYTHON_VERSION=3.10
14
15
ENV DEBIAN_FRONTEND=noninteractive

16
# Install Python and other dependencies
17
18
19
RUN echo 'tzdata tzdata/Areas select America' | debconf-set-selections \
    && echo 'tzdata tzdata/Zones/America select Los_Angeles' | debconf-set-selections \
    && apt-get update -y \
20
    && apt-get install -y ccache software-properties-common git curl sudo \
21
22
    && add-apt-repository ppa:deadsnakes/ppa \
    && apt-get update -y \
23
    && apt-get install -y python${PYTHON_VERSION} python${PYTHON_VERSION}-dev python${PYTHON_VERSION}-venv \
24
25
26
27
28
    && update-alternatives --install /usr/bin/python3 python3 /usr/bin/python${PYTHON_VERSION} 1 \
    && update-alternatives --set python3 /usr/bin/python${PYTHON_VERSION} \
    && ln -sf /usr/bin/python${PYTHON_VERSION}-config /usr/bin/python3-config \
    && curl -sS https://bootstrap.pypa.io/get-pip.py | python${PYTHON_VERSION} \
    && python3 --version && python3 -m pip --version
Stephen Krider's avatar
Stephen Krider committed
29

30
31
32
33
# Workaround for https://github.com/openai/triton/issues/2507 and
# https://github.com/pytorch/pytorch/issues/107960 -- hopefully
# this won't be needed for future versions of this docker image
# or future versions of triton.
34
RUN ldconfig /usr/local/cuda-$(echo $CUDA_VERSION | cut -d. -f1,2)/compat/
35

Stephen Krider's avatar
Stephen Krider committed
36
37
38
WORKDIR /workspace

# install build and runtime dependencies
39
COPY requirements-common.txt requirements-common.txt
40
COPY requirements-adag.txt requirements-adag.txt
41
COPY requirements-cuda.txt requirements-cuda.txt
Stephen Krider's avatar
Stephen Krider committed
42
RUN --mount=type=cache,target=/root/.cache/pip \
43
    python3 -m pip install -r requirements-cuda.txt
44

Mor Zusman's avatar
Mor Zusman committed
45
46
47
48
COPY requirements-mamba.txt requirements-mamba.txt
RUN python3 -m pip install packaging
RUN python3 -m pip install -r requirements-mamba.txt

49
50
51
52
53
54
# cuda arch list used by torch
# can be useful for both `dev` and `test`
# explicitly set the list to avoid issues with torch 2.2
# see https://github.com/pytorch/pytorch/pull/123243
ARG torch_cuda_arch_list='7.0 7.5 8.0 8.6 8.9 9.0+PTX'
ENV TORCH_CUDA_ARCH_LIST=${torch_cuda_arch_list}
Simon Mo's avatar
Simon Mo committed
55
56
#################### BASE BUILD IMAGE ####################

57
#################### WHEEL BUILD IMAGE ####################
58
59
FROM base AS build

60
61
# install build dependencies
COPY requirements-build.txt requirements-build.txt
62

63
RUN --mount=type=cache,target=/root/.cache/pip \
64
    python3 -m pip install -r requirements-build.txt
65

66
# files and directories related to build wheels
Stephen Krider's avatar
Stephen Krider committed
67
68
COPY csrc csrc
COPY setup.py setup.py
bnellnm's avatar
bnellnm committed
69
70
COPY cmake cmake
COPY CMakeLists.txt CMakeLists.txt
71
COPY requirements-common.txt requirements-common.txt
72
COPY requirements-adag.txt requirements-adag.txt
73
COPY requirements-cuda.txt requirements-cuda.txt
Stephen Krider's avatar
Stephen Krider committed
74
COPY pyproject.toml pyproject.toml
75
COPY vllm vllm
Stephen Krider's avatar
Stephen Krider committed
76
77

# max jobs used by Ninja to build extensions
78
79
ARG max_jobs=2
ENV MAX_JOBS=${max_jobs}
80
81
82
# number of threads used by nvcc
ARG nvcc_threads=8
ENV NVCC_THREADS=$nvcc_threads
83

youkaichao's avatar
youkaichao committed
84
85
86
ARG buildkite_commit
ENV BUILDKITE_COMMIT=${buildkite_commit}

87
ARG USE_SCCACHE
88
89
ARG SCCACHE_BUCKET_NAME=vllm-build-sccache
ARG SCCACHE_REGION_NAME=us-west-2
90
91
92
93
94
95
96
97
# if USE_SCCACHE is set, use sccache to speed up compilation
RUN --mount=type=cache,target=/root/.cache/pip \
    if [ "$USE_SCCACHE" = "1" ]; then \
        echo "Installing sccache..." \
        && curl -L -o sccache.tar.gz https://github.com/mozilla/sccache/releases/download/v0.8.1/sccache-v0.8.1-x86_64-unknown-linux-musl.tar.gz \
        && tar -xzf sccache.tar.gz \
        && sudo mv sccache-v0.8.1-x86_64-unknown-linux-musl/sccache /usr/bin/sccache \
        && rm -rf sccache.tar.gz sccache-v0.8.1-x86_64-unknown-linux-musl \
98
99
100
        && export SCCACHE_BUCKET=${SCCACHE_BUCKET_NAME} \
        && export SCCACHE_REGION=${SCCACHE_REGION_NAME} \
        && export SCCACHE_IDLE_TIMEOUT=0 \
101
        && export CMAKE_BUILD_TYPE=Release \
102
        && sccache --show-stats \
103
        && python3 setup.py bdist_wheel --dist-dir=dist --py-limited-api=cp38 \
104
105
106
        && sccache --show-stats; \
    fi

107
108
ENV CCACHE_DIR=/root/.cache/ccache
RUN --mount=type=cache,target=/root/.cache/ccache \
109
    --mount=type=cache,target=/root/.cache/pip \
110
    if [ "$USE_SCCACHE" != "1" ]; then \
111
        python3 setup.py bdist_wheel --dist-dir=dist --py-limited-api=cp38; \
112
    fi
113

114
115
116
117
# check the size of the wheel, we cannot upload wheels larger than 100MB
COPY .buildkite/check-wheel-size.py check-wheel-size.py
RUN python3 check-wheel-size.py dist

Simon Mo's avatar
Simon Mo committed
118
#################### EXTENSION Build IMAGE ####################
Stephen Krider's avatar
Stephen Krider committed
119

120
121
122
123
124
125
126
127
128
129
#################### DEV IMAGE ####################
FROM base as dev

COPY requirements-lint.txt requirements-lint.txt
COPY requirements-test.txt requirements-test.txt
COPY requirements-dev.txt requirements-dev.txt
RUN --mount=type=cache,target=/root/.cache/pip \
    python3 -m pip install -r requirements-dev.txt

#################### DEV IMAGE ####################
Mor Zusman's avatar
Mor Zusman committed
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
#################### MAMBA Build IMAGE ####################
FROM dev as mamba-builder
# max jobs used for build
ARG max_jobs=2
ENV MAX_JOBS=${max_jobs}

WORKDIR /usr/src/mamba

COPY requirements-mamba.txt requirements-mamba.txt

# Download the wheel or build it if a pre-compiled release doesn't exist
RUN pip --verbose wheel -r requirements-mamba.txt \
    --no-build-isolation --no-deps --no-cache-dir

#################### MAMBA Build IMAGE ####################
145

146
147
#################### vLLM installation IMAGE ####################
# image with vLLM installed
148
FROM nvidia/cuda:${CUDA_VERSION}-base-ubuntu20.04 AS vllm-base
149
ARG CUDA_VERSION=12.4.1
150
ARG PYTHON_VERSION=3.10
Simon Mo's avatar
Simon Mo committed
151
WORKDIR /vllm-workspace
152
153
154
155
ENV DEBIAN_FRONTEND=noninteractive

RUN PYTHON_VERSION_STR=$(echo ${PYTHON_VERSION} | sed 's/\.//g') && \
    echo "export PYTHON_VERSION_STR=${PYTHON_VERSION_STR}" >> /etc/environment
156

157
# Install Python and other dependencies
158
159
160
RUN echo 'tzdata tzdata/Areas select America' | debconf-set-selections \
    && echo 'tzdata tzdata/Zones/America select Los_Angeles' | debconf-set-selections \
    && apt-get update -y \
161
    && apt-get install -y ccache software-properties-common git curl sudo vim python3-pip \
162
163
    && add-apt-repository ppa:deadsnakes/ppa \
    && apt-get update -y \
164
165
166
167
168
169
    && apt-get install -y python${PYTHON_VERSION} python${PYTHON_VERSION}-dev python${PYTHON_VERSION}-venv libibverbs-dev \
    && update-alternatives --install /usr/bin/python3 python3 /usr/bin/python${PYTHON_VERSION} 1 \
    && update-alternatives --set python3 /usr/bin/python${PYTHON_VERSION} \
    && ln -sf /usr/bin/python${PYTHON_VERSION}-config /usr/bin/python3-config \
    && curl -sS https://bootstrap.pypa.io/get-pip.py | python${PYTHON_VERSION} \
    && python3 --version && python3 -m pip --version
170
171
172
173
174

# Workaround for https://github.com/openai/triton/issues/2507 and
# https://github.com/pytorch/pytorch/issues/107960 -- hopefully
# this won't be needed for future versions of this docker image
# or future versions of triton.
175
RUN ldconfig /usr/local/cuda-$(echo $CUDA_VERSION | cut -d. -f1,2)/compat/
176
177
178
179

# install vllm wheel first, so that torch etc will be installed
RUN --mount=type=bind,from=build,src=/workspace/dist,target=/vllm-workspace/dist \
    --mount=type=cache,target=/root/.cache/pip \
180
    python3 -m pip install dist/*.whl --verbose
Mor Zusman's avatar
Mor Zusman committed
181
182
183
184

RUN --mount=type=bind,from=mamba-builder,src=/usr/src/mamba,target=/usr/src/mamba \
    --mount=type=cache,target=/root/.cache/pip \
    python3 -m pip install /usr/src/mamba/*.whl --no-cache-dir
185
186

RUN --mount=type=cache,target=/root/.cache/pip \
187
188
    . /etc/environment && \
    python3 -m pip install https://github.com/flashinfer-ai/flashinfer/releases/download/v0.1.4/flashinfer-0.1.4+cu121torch2.4-cp${PYTHON_VERSION_STR}-cp${PYTHON_VERSION_STR}-linux_x86_64.whl
189
#################### vLLM installation IMAGE ####################
Stephen Krider's avatar
Stephen Krider committed
190
191


192
193
194
195
#################### TEST IMAGE ####################
# image to run unit testing suite
# note that this uses vllm installed by `pip`
FROM vllm-base AS test
Stephen Krider's avatar
Stephen Krider committed
196

197
ADD . /vllm-workspace/
Stephen Krider's avatar
Stephen Krider committed
198

199
# install development dependencies (for testing)
Stephen Krider's avatar
Stephen Krider committed
200
RUN --mount=type=cache,target=/root/.cache/pip \
201
    python3 -m pip install -r requirements-dev.txt
202

203
204
205
206
207
208
# doc requires source code
# we hide them inside `test_docs/` , so that this source code
# will not be imported by other tests
RUN mkdir test_docs
RUN mv docs test_docs/
RUN mv vllm test_docs/
Stephen Krider's avatar
Stephen Krider committed
209

210
#################### TEST IMAGE ####################
Stephen Krider's avatar
Stephen Krider committed
211

Simon Mo's avatar
Simon Mo committed
212
#################### OPENAI API SERVER ####################
Stephen Krider's avatar
Stephen Krider committed
213
214
# openai api server alternative
FROM vllm-base AS vllm-openai
215

216
# install additional dependencies for openai api server
Stephen Krider's avatar
Stephen Krider committed
217
RUN --mount=type=cache,target=/root/.cache/pip \
218
    pip install accelerate hf_transfer 'modelscope!=1.15.0'
Stephen Krider's avatar
Stephen Krider committed
219

yhu422's avatar
yhu422 committed
220
221
ENV VLLM_USAGE_SOURCE production-docker-image

Stephen Krider's avatar
Stephen Krider committed
222
ENTRYPOINT ["python3", "-m", "vllm.entrypoints.openai.api_server"]
Simon Mo's avatar
Simon Mo committed
223
#################### OPENAI API SERVER ####################