Dockerfile 8.99 KB
Newer Older
Simon Mo's avatar
Simon Mo committed
1
2
3
# The vLLM Dockerfile is used to construct vLLM image that can be directly used
# to run the OpenAI compatible server.

4
5
6
7
# Please update any changes made here to
# docs/source/dev/dockerfile/dockerfile.rst and
# docs/source/assets/dev/dockerfile-stages-dependency.png

8
ARG CUDA_VERSION=12.4.1
Simon Mo's avatar
Simon Mo committed
9
#################### BASE BUILD IMAGE ####################
10
# prepare basic build environment
11
FROM nvidia/cuda:${CUDA_VERSION}-devel-ubuntu20.04 AS base
12
ARG CUDA_VERSION=12.4.1
13
ARG PYTHON_VERSION=3.12
14
15
ENV DEBIAN_FRONTEND=noninteractive

16
# Install Python and other dependencies
17
18
19
RUN echo 'tzdata tzdata/Areas select America' | debconf-set-selections \
    && echo 'tzdata tzdata/Zones/America select Los_Angeles' | debconf-set-selections \
    && apt-get update -y \
20
    && apt-get install -y ccache software-properties-common git curl sudo \
21
22
    && add-apt-repository ppa:deadsnakes/ppa \
    && apt-get update -y \
23
    && apt-get install -y python${PYTHON_VERSION} python${PYTHON_VERSION}-dev python${PYTHON_VERSION}-venv \
24
25
26
27
28
    && update-alternatives --install /usr/bin/python3 python3 /usr/bin/python${PYTHON_VERSION} 1 \
    && update-alternatives --set python3 /usr/bin/python${PYTHON_VERSION} \
    && ln -sf /usr/bin/python${PYTHON_VERSION}-config /usr/bin/python3-config \
    && curl -sS https://bootstrap.pypa.io/get-pip.py | python${PYTHON_VERSION} \
    && python3 --version && python3 -m pip --version
Stephen Krider's avatar
Stephen Krider committed
29

30
31
32
33
34
35
36
37
# Upgrade to GCC 10 to avoid https://gcc.gnu.org/bugzilla/show_bug.cgi?id=92519
# as it was causing spam when compiling the CUTLASS kernels
RUN apt-get install -y gcc-10 g++-10
RUN update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-10 110 --slave /usr/bin/g++ g++ /usr/bin/g++-10
RUN <<EOF
gcc --version
EOF

38
39
40
41
# Workaround for https://github.com/openai/triton/issues/2507 and
# https://github.com/pytorch/pytorch/issues/107960 -- hopefully
# this won't be needed for future versions of this docker image
# or future versions of triton.
42
RUN ldconfig /usr/local/cuda-$(echo $CUDA_VERSION | cut -d. -f1,2)/compat/
43

Stephen Krider's avatar
Stephen Krider committed
44
45
46
WORKDIR /workspace

# install build and runtime dependencies
47
48
COPY requirements-common.txt requirements-common.txt
COPY requirements-cuda.txt requirements-cuda.txt
Stephen Krider's avatar
Stephen Krider committed
49
RUN --mount=type=cache,target=/root/.cache/pip \
50
    python3 -m pip install -r requirements-cuda.txt
51

Mor Zusman's avatar
Mor Zusman committed
52

53
54
55
56
57
58
# cuda arch list used by torch
# can be useful for both `dev` and `test`
# explicitly set the list to avoid issues with torch 2.2
# see https://github.com/pytorch/pytorch/pull/123243
ARG torch_cuda_arch_list='7.0 7.5 8.0 8.6 8.9 9.0+PTX'
ENV TORCH_CUDA_ARCH_LIST=${torch_cuda_arch_list}
59
60
61
# Override the arch list for flash-attn to reduce the binary size
ARG vllm_fa_cmake_gpu_arches='80-real;90-real'
ENV VLLM_FA_CMAKE_GPU_ARCHES=${vllm_fa_cmake_gpu_arches}
Simon Mo's avatar
Simon Mo committed
62
63
#################### BASE BUILD IMAGE ####################

64
#################### WHEEL BUILD IMAGE ####################
65
66
FROM base AS build

67
68
# install build dependencies
COPY requirements-build.txt requirements-build.txt
69

70
RUN --mount=type=cache,target=/root/.cache/pip \
71
    python3 -m pip install -r requirements-build.txt
72

73
# files and directories related to build wheels
Stephen Krider's avatar
Stephen Krider committed
74
75
COPY csrc csrc
COPY setup.py setup.py
bnellnm's avatar
bnellnm committed
76
77
COPY cmake cmake
COPY CMakeLists.txt CMakeLists.txt
78
COPY README.md README.md
79
80
COPY requirements-common.txt requirements-common.txt
COPY requirements-cuda.txt requirements-cuda.txt
Stephen Krider's avatar
Stephen Krider committed
81
COPY pyproject.toml pyproject.toml
82
COPY vllm vllm
Stephen Krider's avatar
Stephen Krider committed
83
84

# max jobs used by Ninja to build extensions
85
86
ARG max_jobs=2
ENV MAX_JOBS=${max_jobs}
87
88
89
# number of threads used by nvcc
ARG nvcc_threads=8
ENV NVCC_THREADS=$nvcc_threads
90

91
ARG USE_SCCACHE
92
93
ARG SCCACHE_BUCKET_NAME=vllm-build-sccache
ARG SCCACHE_REGION_NAME=us-west-2
94
ARG SCCACHE_S3_NO_CREDENTIALS=0
95
96
# if USE_SCCACHE is set, use sccache to speed up compilation
RUN --mount=type=cache,target=/root/.cache/pip \
97
    --mount=type=bind,source=.git,target=.git \
98
99
100
101
102
103
    if [ "$USE_SCCACHE" = "1" ]; then \
        echo "Installing sccache..." \
        && curl -L -o sccache.tar.gz https://github.com/mozilla/sccache/releases/download/v0.8.1/sccache-v0.8.1-x86_64-unknown-linux-musl.tar.gz \
        && tar -xzf sccache.tar.gz \
        && sudo mv sccache-v0.8.1-x86_64-unknown-linux-musl/sccache /usr/bin/sccache \
        && rm -rf sccache.tar.gz sccache-v0.8.1-x86_64-unknown-linux-musl \
104
105
        && export SCCACHE_BUCKET=${SCCACHE_BUCKET_NAME} \
        && export SCCACHE_REGION=${SCCACHE_REGION_NAME} \
106
        && export SCCACHE_S3_NO_CREDENTIALS=${SCCACHE_S3_NO_CREDENTIALS} \
107
        && export SCCACHE_IDLE_TIMEOUT=0 \
108
        && export CMAKE_BUILD_TYPE=Release \
109
        && sccache --show-stats \
110
        && python3 setup.py bdist_wheel --dist-dir=dist --py-limited-api=cp38 \
111
112
113
        && sccache --show-stats; \
    fi

114
115
ENV CCACHE_DIR=/root/.cache/ccache
RUN --mount=type=cache,target=/root/.cache/ccache \
116
    --mount=type=cache,target=/root/.cache/pip \
117
    --mount=type=bind,source=.git,target=.git  \
118
    if [ "$USE_SCCACHE" != "1" ]; then \
119
        python3 setup.py bdist_wheel --dist-dir=dist --py-limited-api=cp38; \
120
    fi
121

122
# Check the size of the wheel if RUN_WHEEL_CHECK is true
123
COPY .buildkite/check-wheel-size.py check-wheel-size.py
124
125
126
127
128
129
130
131
132
# Default max size of the wheel is 250MB
ARG VLLM_MAX_SIZE_MB=250
ENV VLLM_MAX_SIZE_MB=$VLLM_MAX_SIZE_MB
ARG RUN_WHEEL_CHECK=true
RUN if [ "$RUN_WHEEL_CHECK" = "true" ]; then \
        python3 check-wheel-size.py dist; \
    else \
        echo "Skipping wheel size check."; \
    fi
Simon Mo's avatar
Simon Mo committed
133
#################### EXTENSION Build IMAGE ####################
Stephen Krider's avatar
Stephen Krider committed
134

135
136
137
138
139
140
141
142
143
144
#################### DEV IMAGE ####################
FROM base as dev

COPY requirements-lint.txt requirements-lint.txt
COPY requirements-test.txt requirements-test.txt
COPY requirements-dev.txt requirements-dev.txt
RUN --mount=type=cache,target=/root/.cache/pip \
    python3 -m pip install -r requirements-dev.txt

#################### DEV IMAGE ####################
145
146
#################### vLLM installation IMAGE ####################
# image with vLLM installed
147
FROM nvidia/cuda:${CUDA_VERSION}-base-ubuntu22.04 AS vllm-base
148
ARG CUDA_VERSION=12.4.1
149
ARG PYTHON_VERSION=3.12
Simon Mo's avatar
Simon Mo committed
150
WORKDIR /vllm-workspace
151
152
153
154
ENV DEBIAN_FRONTEND=noninteractive

RUN PYTHON_VERSION_STR=$(echo ${PYTHON_VERSION} | sed 's/\.//g') && \
    echo "export PYTHON_VERSION_STR=${PYTHON_VERSION_STR}" >> /etc/environment
155

156
# Install Python and other dependencies
157
158
159
RUN echo 'tzdata tzdata/Areas select America' | debconf-set-selections \
    && echo 'tzdata tzdata/Zones/America select Los_Angeles' | debconf-set-selections \
    && apt-get update -y \
160
    && apt-get install -y ccache software-properties-common git curl sudo vim python3-pip \
161
    && apt-get install -y ffmpeg libsm6 libxext6 libgl1 \
162
163
    && add-apt-repository ppa:deadsnakes/ppa \
    && apt-get update -y \
164
165
166
167
168
169
    && apt-get install -y python${PYTHON_VERSION} python${PYTHON_VERSION}-dev python${PYTHON_VERSION}-venv libibverbs-dev \
    && update-alternatives --install /usr/bin/python3 python3 /usr/bin/python${PYTHON_VERSION} 1 \
    && update-alternatives --set python3 /usr/bin/python${PYTHON_VERSION} \
    && ln -sf /usr/bin/python${PYTHON_VERSION}-config /usr/bin/python3-config \
    && curl -sS https://bootstrap.pypa.io/get-pip.py | python${PYTHON_VERSION} \
    && python3 --version && python3 -m pip --version
170
171
172
173
174

# Workaround for https://github.com/openai/triton/issues/2507 and
# https://github.com/pytorch/pytorch/issues/107960 -- hopefully
# this won't be needed for future versions of this docker image
# or future versions of triton.
175
RUN ldconfig /usr/local/cuda-$(echo $CUDA_VERSION | cut -d. -f1,2)/compat/
176
177
178
179

# install vllm wheel first, so that torch etc will be installed
RUN --mount=type=bind,from=build,src=/workspace/dist,target=/vllm-workspace/dist \
    --mount=type=cache,target=/root/.cache/pip \
180
    python3 -m pip install dist/*.whl --verbose
Mor Zusman's avatar
Mor Zusman committed
181

182
RUN --mount=type=cache,target=/root/.cache/pip \
183
    . /etc/environment && \
184
    python3 -m pip install https://github.com/flashinfer-ai/flashinfer/releases/download/v0.1.6/flashinfer-0.1.6+cu121torch2.4-cp${PYTHON_VERSION_STR}-cp${PYTHON_VERSION_STR}-linux_x86_64.whl
185
COPY examples examples
186
#################### vLLM installation IMAGE ####################
Stephen Krider's avatar
Stephen Krider committed
187
188


189
190
191
192
#################### TEST IMAGE ####################
# image to run unit testing suite
# note that this uses vllm installed by `pip`
FROM vllm-base AS test
Stephen Krider's avatar
Stephen Krider committed
193

194
ADD . /vllm-workspace/
Stephen Krider's avatar
Stephen Krider committed
195

196
# install development dependencies (for testing)
Stephen Krider's avatar
Stephen Krider committed
197
RUN --mount=type=cache,target=/root/.cache/pip \
198
    python3 -m pip install -r requirements-dev.txt
199

200
201
202
203
204
205
# doc requires source code
# we hide them inside `test_docs/` , so that this source code
# will not be imported by other tests
RUN mkdir test_docs
RUN mv docs test_docs/
RUN mv vllm test_docs/
Stephen Krider's avatar
Stephen Krider committed
206

207
#################### TEST IMAGE ####################
Stephen Krider's avatar
Stephen Krider committed
208

Simon Mo's avatar
Simon Mo committed
209
#################### OPENAI API SERVER ####################
Stephen Krider's avatar
Stephen Krider committed
210
211
# openai api server alternative
FROM vllm-base AS vllm-openai
212

213
# install additional dependencies for openai api server
Stephen Krider's avatar
Stephen Krider committed
214
RUN --mount=type=cache,target=/root/.cache/pip \
215
    pip install accelerate hf_transfer 'modelscope!=1.15.0' bitsandbytes>=0.44.0 timm==0.9.10
Stephen Krider's avatar
Stephen Krider committed
216

yhu422's avatar
yhu422 committed
217
218
ENV VLLM_USAGE_SOURCE production-docker-image

Stephen Krider's avatar
Stephen Krider committed
219
ENTRYPOINT ["python3", "-m", "vllm.entrypoints.openai.api_server"]
Simon Mo's avatar
Simon Mo committed
220
#################### OPENAI API SERVER ####################