"vscode:/vscode.git/clone" did not exist on "a3a3ee4e6febe8c270fdec0765c844186a728079"
Dockerfile 7.67 KB
Newer Older
Simon Mo's avatar
Simon Mo committed
1
2
3
# The vLLM Dockerfile is used to construct vLLM image that can be directly used
# to run the OpenAI compatible server.

4
5
6
7
# Please update any changes made here to
# docs/source/dev/dockerfile/dockerfile.rst and
# docs/source/assets/dev/dockerfile-stages-dependency.png

8
ARG CUDA_VERSION=12.4.1
Simon Mo's avatar
Simon Mo committed
9
#################### BASE BUILD IMAGE ####################
10
# prepare basic build environment
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
FROM nvidia/cuda:${CUDA_VERSION}-devel-ubuntu22.04 AS base

ARG CUDA_VERSION=12.4.1
ARG PYTHON_VERSION=3

ENV DEBIAN_FRONTEND=noninteractive

RUN echo 'tzdata tzdata/Areas select America' | debconf-set-selections \
    && echo 'tzdata tzdata/Zones/America select Los_Angeles' | debconf-set-selections \
    && apt-get update -y \
    && apt-get install -y ccache software-properties-common \
    && add-apt-repository ppa:deadsnakes/ppa \
    && apt-get update -y \
    && apt-get install -y python${PYTHON_VERSION} python${PYTHON_VERSION}-dev python${PYTHON_VERSION}-venv python3-pip \
    && if [ "${PYTHON_VERSION}" != "3" ]; then update-alternatives --install /usr/bin/python3 python3 /usr/bin/python${PYTHON_VERSION} 1; fi \
    && python3 --version \
    && python3 -m pip --version
Stephen Krider's avatar
Stephen Krider committed
28
29

RUN apt-get update -y \
30
    && apt-get install -y python3-pip git curl sudo
Stephen Krider's avatar
Stephen Krider committed
31

32
33
34
35
# Workaround for https://github.com/openai/triton/issues/2507 and
# https://github.com/pytorch/pytorch/issues/107960 -- hopefully
# this won't be needed for future versions of this docker image
# or future versions of triton.
36
RUN ldconfig /usr/local/cuda-$(echo $CUDA_VERSION | cut -d. -f1,2)/compat/
37

Stephen Krider's avatar
Stephen Krider committed
38
39
40
WORKDIR /workspace

# install build and runtime dependencies
41
42
COPY requirements-common.txt requirements-common.txt
COPY requirements-cuda.txt requirements-cuda.txt
Stephen Krider's avatar
Stephen Krider committed
43
RUN --mount=type=cache,target=/root/.cache/pip \
44
    python3 -m pip install -r requirements-cuda.txt
45

Mor Zusman's avatar
Mor Zusman committed
46
47
48
49
COPY requirements-mamba.txt requirements-mamba.txt
RUN python3 -m pip install packaging
RUN python3 -m pip install -r requirements-mamba.txt

50
51
52
53
54
55
# cuda arch list used by torch
# can be useful for both `dev` and `test`
# explicitly set the list to avoid issues with torch 2.2
# see https://github.com/pytorch/pytorch/pull/123243
ARG torch_cuda_arch_list='7.0 7.5 8.0 8.6 8.9 9.0+PTX'
ENV TORCH_CUDA_ARCH_LIST=${torch_cuda_arch_list}
Simon Mo's avatar
Simon Mo committed
56
57
#################### BASE BUILD IMAGE ####################

58
#################### WHEEL BUILD IMAGE ####################
59
60
61
FROM base AS build

ARG PYTHON_VERSION=3
Stephen Krider's avatar
Stephen Krider committed
62

63
64
# install build dependencies
COPY requirements-build.txt requirements-build.txt
65

66
RUN --mount=type=cache,target=/root/.cache/pip \
67
    python3 -m pip install -r requirements-build.txt
68

69
70
71
# install compiler cache to speed up compilation leveraging local or remote caching
RUN apt-get update -y && apt-get install -y ccache

72
# files and directories related to build wheels
Stephen Krider's avatar
Stephen Krider committed
73
74
COPY csrc csrc
COPY setup.py setup.py
bnellnm's avatar
bnellnm committed
75
76
COPY cmake cmake
COPY CMakeLists.txt CMakeLists.txt
77
78
COPY requirements-common.txt requirements-common.txt
COPY requirements-cuda.txt requirements-cuda.txt
Stephen Krider's avatar
Stephen Krider committed
79
COPY pyproject.toml pyproject.toml
80
COPY vllm vllm
Stephen Krider's avatar
Stephen Krider committed
81
82

# max jobs used by Ninja to build extensions
83
84
ARG max_jobs=2
ENV MAX_JOBS=${max_jobs}
85
86
87
# number of threads used by nvcc
ARG nvcc_threads=8
ENV NVCC_THREADS=$nvcc_threads
88
89
# make sure punica kernels are built (for LoRA)
ENV VLLM_INSTALL_PUNICA_KERNELS=1
90

youkaichao's avatar
youkaichao committed
91
92
93
ARG buildkite_commit
ENV BUILDKITE_COMMIT=${buildkite_commit}

94
95
96
97
98
99
100
101
102
103
104
ARG USE_SCCACHE
# if USE_SCCACHE is set, use sccache to speed up compilation
RUN --mount=type=cache,target=/root/.cache/pip \
    if [ "$USE_SCCACHE" = "1" ]; then \
        echo "Installing sccache..." \
        && curl -L -o sccache.tar.gz https://github.com/mozilla/sccache/releases/download/v0.8.1/sccache-v0.8.1-x86_64-unknown-linux-musl.tar.gz \
        && tar -xzf sccache.tar.gz \
        && sudo mv sccache-v0.8.1-x86_64-unknown-linux-musl/sccache /usr/bin/sccache \
        && rm -rf sccache.tar.gz sccache-v0.8.1-x86_64-unknown-linux-musl \
        && export SCCACHE_BUCKET=vllm-build-sccache \
        && export SCCACHE_REGION=us-west-2 \
105
        && export CMAKE_BUILD_TYPE=Release \
106
        && sccache --show-stats \
107
        && python3 setup.py bdist_wheel --dist-dir=dist --py-limited-api=cp38 \
108
109
110
        && sccache --show-stats; \
    fi

111
112
ENV CCACHE_DIR=/root/.cache/ccache
RUN --mount=type=cache,target=/root/.cache/ccache \
113
    --mount=type=cache,target=/root/.cache/pip \
114
    if [ "$USE_SCCACHE" != "1" ]; then \
115
        python3 setup.py bdist_wheel --dist-dir=dist --py-limited-api=cp38; \
116
    fi
117

118
119
120
121
# check the size of the wheel, we cannot upload wheels larger than 100MB
COPY .buildkite/check-wheel-size.py check-wheel-size.py
RUN python3 check-wheel-size.py dist

Simon Mo's avatar
Simon Mo committed
122
#################### EXTENSION Build IMAGE ####################
Stephen Krider's avatar
Stephen Krider committed
123

124
125
126
127
128
129
130
131
132
133
#################### DEV IMAGE ####################
FROM base as dev

COPY requirements-lint.txt requirements-lint.txt
COPY requirements-test.txt requirements-test.txt
COPY requirements-dev.txt requirements-dev.txt
RUN --mount=type=cache,target=/root/.cache/pip \
    python3 -m pip install -r requirements-dev.txt

#################### DEV IMAGE ####################
Mor Zusman's avatar
Mor Zusman committed
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
#################### MAMBA Build IMAGE ####################
FROM dev as mamba-builder
# max jobs used for build
ARG max_jobs=2
ENV MAX_JOBS=${max_jobs}

WORKDIR /usr/src/mamba

COPY requirements-mamba.txt requirements-mamba.txt

# Download the wheel or build it if a pre-compiled release doesn't exist
RUN pip --verbose wheel -r requirements-mamba.txt \
    --no-build-isolation --no-deps --no-cache-dir

#################### MAMBA Build IMAGE ####################
149

150
151
#################### vLLM installation IMAGE ####################
# image with vLLM installed
152
153
FROM nvidia/cuda:${CUDA_VERSION}-base-ubuntu22.04 AS vllm-base
ARG CUDA_VERSION=12.4.1
Simon Mo's avatar
Simon Mo committed
154
WORKDIR /vllm-workspace
155
156
157
158
159
160
161
162

RUN apt-get update -y \
    && apt-get install -y python3-pip git vim

# Workaround for https://github.com/openai/triton/issues/2507 and
# https://github.com/pytorch/pytorch/issues/107960 -- hopefully
# this won't be needed for future versions of this docker image
# or future versions of triton.
163
RUN ldconfig /usr/local/cuda-$(echo $CUDA_VERSION | cut -d. -f1,2)/compat/
164
165
166
167

# install vllm wheel first, so that torch etc will be installed
RUN --mount=type=bind,from=build,src=/workspace/dist,target=/vllm-workspace/dist \
    --mount=type=cache,target=/root/.cache/pip \
168
    python3 -m pip install dist/*.whl --verbose
Mor Zusman's avatar
Mor Zusman committed
169
170
171
172

RUN --mount=type=bind,from=mamba-builder,src=/usr/src/mamba,target=/usr/src/mamba \
    --mount=type=cache,target=/root/.cache/pip \
    python3 -m pip install /usr/src/mamba/*.whl --no-cache-dir
173
174
175

RUN --mount=type=cache,target=/root/.cache/pip \
    python3 -m pip install https://github.com/flashinfer-ai/flashinfer/releases/download/v0.0.8/flashinfer-0.0.8+cu121torch2.3-cp310-cp310-linux_x86_64.whl
176
#################### vLLM installation IMAGE ####################
Stephen Krider's avatar
Stephen Krider committed
177
178


179
180
181
182
#################### TEST IMAGE ####################
# image to run unit testing suite
# note that this uses vllm installed by `pip`
FROM vllm-base AS test
Stephen Krider's avatar
Stephen Krider committed
183

184
ADD . /vllm-workspace/
Stephen Krider's avatar
Stephen Krider committed
185

186
# install development dependencies (for testing)
Stephen Krider's avatar
Stephen Krider committed
187
RUN --mount=type=cache,target=/root/.cache/pip \
188
    python3 -m pip install -r requirements-dev.txt
189

190
191
192
193
194
195
# doc requires source code
# we hide them inside `test_docs/` , so that this source code
# will not be imported by other tests
RUN mkdir test_docs
RUN mv docs test_docs/
RUN mv vllm test_docs/
Stephen Krider's avatar
Stephen Krider committed
196

197
#################### TEST IMAGE ####################
Stephen Krider's avatar
Stephen Krider committed
198

Simon Mo's avatar
Simon Mo committed
199
#################### OPENAI API SERVER ####################
Stephen Krider's avatar
Stephen Krider committed
200
201
# openai api server alternative
FROM vllm-base AS vllm-openai
202

203
# install additional dependencies for openai api server
Stephen Krider's avatar
Stephen Krider committed
204
RUN --mount=type=cache,target=/root/.cache/pip \
205
    pip install accelerate hf_transfer 'modelscope!=1.15.0'
Stephen Krider's avatar
Stephen Krider committed
206

yhu422's avatar
yhu422 committed
207
208
ENV VLLM_USAGE_SOURCE production-docker-image

Stephen Krider's avatar
Stephen Krider committed
209
ENTRYPOINT ["python3", "-m", "vllm.entrypoints.openai.api_server"]
Simon Mo's avatar
Simon Mo committed
210
#################### OPENAI API SERVER ####################