Dockerfile 6.52 KB
Newer Older
Simon Mo's avatar
Simon Mo committed
1
2
3
# The vLLM Dockerfile is used to construct vLLM image that can be directly used
# to run the OpenAI compatible server.

4
5
6
7
# Please update any changes made here to
# docs/source/dev/dockerfile/dockerfile.rst and
# docs/source/assets/dev/dockerfile-stages-dependency.png

8
ARG CUDA_VERSION=12.4.1
Simon Mo's avatar
Simon Mo committed
9
#################### BASE BUILD IMAGE ####################
10
# prepare basic build environment
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
FROM nvidia/cuda:${CUDA_VERSION}-devel-ubuntu22.04 AS base

ARG CUDA_VERSION=12.4.1
ARG PYTHON_VERSION=3

ENV DEBIAN_FRONTEND=noninteractive

RUN echo 'tzdata tzdata/Areas select America' | debconf-set-selections \
    && echo 'tzdata tzdata/Zones/America select Los_Angeles' | debconf-set-selections \
    && apt-get update -y \
    && apt-get install -y ccache software-properties-common \
    && add-apt-repository ppa:deadsnakes/ppa \
    && apt-get update -y \
    && apt-get install -y python${PYTHON_VERSION} python${PYTHON_VERSION}-dev python${PYTHON_VERSION}-venv python3-pip \
    && if [ "${PYTHON_VERSION}" != "3" ]; then update-alternatives --install /usr/bin/python3 python3 /usr/bin/python${PYTHON_VERSION} 1; fi \
    && python3 --version \
    && python3 -m pip --version
Stephen Krider's avatar
Stephen Krider committed
28
29

RUN apt-get update -y \
30
    && apt-get install -y python3-pip git curl sudo
Stephen Krider's avatar
Stephen Krider committed
31

32
33
34
35
# Workaround for https://github.com/openai/triton/issues/2507 and
# https://github.com/pytorch/pytorch/issues/107960 -- hopefully
# this won't be needed for future versions of this docker image
# or future versions of triton.
36
RUN ldconfig /usr/local/cuda-$(echo $CUDA_VERSION | cut -d. -f1,2)/compat/
37

Stephen Krider's avatar
Stephen Krider committed
38
39
40
WORKDIR /workspace

# install build and runtime dependencies
41
42
COPY requirements-common.txt requirements-common.txt
COPY requirements-cuda.txt requirements-cuda.txt
Stephen Krider's avatar
Stephen Krider committed
43
RUN --mount=type=cache,target=/root/.cache/pip \
44
    python3 -m pip install -r requirements-cuda.txt
45
46
47
48
49
50
51

# cuda arch list used by torch
# can be useful for both `dev` and `test`
# explicitly set the list to avoid issues with torch 2.2
# see https://github.com/pytorch/pytorch/pull/123243
ARG torch_cuda_arch_list='7.0 7.5 8.0 8.6 8.9 9.0+PTX'
ENV TORCH_CUDA_ARCH_LIST=${torch_cuda_arch_list}
Simon Mo's avatar
Simon Mo committed
52
53
#################### BASE BUILD IMAGE ####################

54
#################### WHEEL BUILD IMAGE ####################
55
56
57
FROM base AS build

ARG PYTHON_VERSION=3
Stephen Krider's avatar
Stephen Krider committed
58

59
60
# install build dependencies
COPY requirements-build.txt requirements-build.txt
61

62
RUN --mount=type=cache,target=/root/.cache/pip \
63
    python3 -m pip install -r requirements-build.txt
64

65
66
67
# install compiler cache to speed up compilation leveraging local or remote caching
RUN apt-get update -y && apt-get install -y ccache

68
# files and directories related to build wheels
Stephen Krider's avatar
Stephen Krider committed
69
70
COPY csrc csrc
COPY setup.py setup.py
bnellnm's avatar
bnellnm committed
71
72
COPY cmake cmake
COPY CMakeLists.txt CMakeLists.txt
73
74
COPY requirements-common.txt requirements-common.txt
COPY requirements-cuda.txt requirements-cuda.txt
Stephen Krider's avatar
Stephen Krider committed
75
COPY pyproject.toml pyproject.toml
76
COPY vllm vllm
Stephen Krider's avatar
Stephen Krider committed
77
78

# max jobs used by Ninja to build extensions
79
80
ARG max_jobs=2
ENV MAX_JOBS=${max_jobs}
81
82
83
# number of threads used by nvcc
ARG nvcc_threads=8
ENV NVCC_THREADS=$nvcc_threads
84
85
# make sure punica kernels are built (for LoRA)
ENV VLLM_INSTALL_PUNICA_KERNELS=1
86

87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
ARG USE_SCCACHE
# if USE_SCCACHE is set, use sccache to speed up compilation
RUN --mount=type=cache,target=/root/.cache/pip \
    if [ "$USE_SCCACHE" = "1" ]; then \
        echo "Installing sccache..." \
        && curl -L -o sccache.tar.gz https://github.com/mozilla/sccache/releases/download/v0.8.1/sccache-v0.8.1-x86_64-unknown-linux-musl.tar.gz \
        && tar -xzf sccache.tar.gz \
        && sudo mv sccache-v0.8.1-x86_64-unknown-linux-musl/sccache /usr/bin/sccache \
        && rm -rf sccache.tar.gz sccache-v0.8.1-x86_64-unknown-linux-musl \
        && export SCCACHE_BUCKET=vllm-build-sccache \
        && export SCCACHE_REGION=us-west-2 \
        && sccache --show-stats \
        && python3 setup.py bdist_wheel --dist-dir=dist \
        && sccache --show-stats; \
    fi

103
104
ENV CCACHE_DIR=/root/.cache/ccache
RUN --mount=type=cache,target=/root/.cache/ccache \
105
    --mount=type=cache,target=/root/.cache/pip \
106
107
108
    if [ "$USE_SCCACHE" != "1" ]; then \
        python3 setup.py bdist_wheel --dist-dir=dist; \
    fi
109

110
111
112
113
# check the size of the wheel, we cannot upload wheels larger than 100MB
COPY .buildkite/check-wheel-size.py check-wheel-size.py
RUN python3 check-wheel-size.py dist

Simon Mo's avatar
Simon Mo committed
114
#################### EXTENSION Build IMAGE ####################
Stephen Krider's avatar
Stephen Krider committed
115

116
117
118
119
120
121
122
123
124
125
126
#################### DEV IMAGE ####################
FROM base as dev

COPY requirements-lint.txt requirements-lint.txt
COPY requirements-test.txt requirements-test.txt
COPY requirements-dev.txt requirements-dev.txt
RUN --mount=type=cache,target=/root/.cache/pip \
    python3 -m pip install -r requirements-dev.txt

#################### DEV IMAGE ####################

127
128
#################### vLLM installation IMAGE ####################
# image with vLLM installed
129
130
FROM nvidia/cuda:${CUDA_VERSION}-base-ubuntu22.04 AS vllm-base
ARG CUDA_VERSION=12.4.1
Simon Mo's avatar
Simon Mo committed
131
WORKDIR /vllm-workspace
132
133
134
135
136
137
138
139

RUN apt-get update -y \
    && apt-get install -y python3-pip git vim

# Workaround for https://github.com/openai/triton/issues/2507 and
# https://github.com/pytorch/pytorch/issues/107960 -- hopefully
# this won't be needed for future versions of this docker image
# or future versions of triton.
140
RUN ldconfig /usr/local/cuda-$(echo $CUDA_VERSION | cut -d. -f1,2)/compat/
141
142
143
144

# install vllm wheel first, so that torch etc will be installed
RUN --mount=type=bind,from=build,src=/workspace/dist,target=/vllm-workspace/dist \
    --mount=type=cache,target=/root/.cache/pip \
145
    python3 -m pip install dist/*.whl --verbose
146
#################### vLLM installation IMAGE ####################
Stephen Krider's avatar
Stephen Krider committed
147
148


149
150
151
152
#################### TEST IMAGE ####################
# image to run unit testing suite
# note that this uses vllm installed by `pip`
FROM vllm-base AS test
Stephen Krider's avatar
Stephen Krider committed
153

154
ADD . /vllm-workspace/
Stephen Krider's avatar
Stephen Krider committed
155

156
# install development dependencies (for testing)
Stephen Krider's avatar
Stephen Krider committed
157
RUN --mount=type=cache,target=/root/.cache/pip \
158
    python3 -m pip install -r requirements-dev.txt
159

160
161
162
163
164
165
# doc requires source code
# we hide them inside `test_docs/` , so that this source code
# will not be imported by other tests
RUN mkdir test_docs
RUN mv docs test_docs/
RUN mv vllm test_docs/
Stephen Krider's avatar
Stephen Krider committed
166

167
#################### TEST IMAGE ####################
Stephen Krider's avatar
Stephen Krider committed
168

Simon Mo's avatar
Simon Mo committed
169
#################### OPENAI API SERVER ####################
Stephen Krider's avatar
Stephen Krider committed
170
171
# openai api server alternative
FROM vllm-base AS vllm-openai
172

173
# install additional dependencies for openai api server
Stephen Krider's avatar
Stephen Krider committed
174
RUN --mount=type=cache,target=/root/.cache/pip \
175
    pip install accelerate hf_transfer modelscope
Stephen Krider's avatar
Stephen Krider committed
176

yhu422's avatar
yhu422 committed
177
178
ENV VLLM_USAGE_SOURCE production-docker-image

Stephen Krider's avatar
Stephen Krider committed
179
ENTRYPOINT ["python3", "-m", "vllm.entrypoints.openai.api_server"]
Simon Mo's avatar
Simon Mo committed
180
#################### OPENAI API SERVER ####################