Dockerfile 5.52 KB
Newer Older
Simon Mo's avatar
Simon Mo committed
1
2
3
# The vLLM Dockerfile is used to construct vLLM image that can be directly used
# to run the OpenAI compatible server.

4
5
6
7
# Please update any changes made here to
# docs/source/dev/dockerfile/dockerfile.rst and
# docs/source/assets/dev/dockerfile-stages-dependency.png

Simon Mo's avatar
Simon Mo committed
8
#################### BASE BUILD IMAGE ####################
9
# prepare basic build environment
10
FROM nvidia/cuda:12.4.1-devel-ubuntu22.04 AS dev
Stephen Krider's avatar
Stephen Krider committed
11
12

RUN apt-get update -y \
13
    && apt-get install -y python3-pip git curl sudo
Stephen Krider's avatar
Stephen Krider committed
14

15
16
17
18
# Workaround for https://github.com/openai/triton/issues/2507 and
# https://github.com/pytorch/pytorch/issues/107960 -- hopefully
# this won't be needed for future versions of this docker image
# or future versions of triton.
19
RUN ldconfig /usr/local/cuda-12.4/compat/
20

Stephen Krider's avatar
Stephen Krider committed
21
22
23
WORKDIR /workspace

# install build and runtime dependencies
24
25
COPY requirements-common.txt requirements-common.txt
COPY requirements-cuda.txt requirements-cuda.txt
Stephen Krider's avatar
Stephen Krider committed
26
RUN --mount=type=cache,target=/root/.cache/pip \
27
    pip install -r requirements-cuda.txt
28

Stephen Krider's avatar
Stephen Krider committed
29
# install development dependencies
30
31
COPY requirements-lint.txt requirements-lint.txt
COPY requirements-test.txt requirements-test.txt
Stephen Krider's avatar
Stephen Krider committed
32
33
34
COPY requirements-dev.txt requirements-dev.txt
RUN --mount=type=cache,target=/root/.cache/pip \
    pip install -r requirements-dev.txt
35
36
37
38
39
40
41

# cuda arch list used by torch
# can be useful for both `dev` and `test`
# explicitly set the list to avoid issues with torch 2.2
# see https://github.com/pytorch/pytorch/pull/123243
ARG torch_cuda_arch_list='7.0 7.5 8.0 8.6 8.9 9.0+PTX'
ENV TORCH_CUDA_ARCH_LIST=${torch_cuda_arch_list}
Simon Mo's avatar
Simon Mo committed
42
43
#################### BASE BUILD IMAGE ####################

Stephen Krider's avatar
Stephen Krider committed
44

45
#################### WHEEL BUILD IMAGE ####################
Stephen Krider's avatar
Stephen Krider committed
46
47
FROM dev AS build

48
49
50
51
52
# install build dependencies
COPY requirements-build.txt requirements-build.txt
RUN --mount=type=cache,target=/root/.cache/pip \
    pip install -r requirements-build.txt

53
54
55
# install compiler cache to speed up compilation leveraging local or remote caching
RUN apt-get update -y && apt-get install -y ccache

56
# files and directories related to build wheels
Stephen Krider's avatar
Stephen Krider committed
57
58
COPY csrc csrc
COPY setup.py setup.py
bnellnm's avatar
bnellnm committed
59
60
COPY cmake cmake
COPY CMakeLists.txt CMakeLists.txt
61
62
COPY requirements-common.txt requirements-common.txt
COPY requirements-cuda.txt requirements-cuda.txt
Stephen Krider's avatar
Stephen Krider committed
63
COPY pyproject.toml pyproject.toml
64
COPY vllm vllm
Stephen Krider's avatar
Stephen Krider committed
65
66

# max jobs used by Ninja to build extensions
67
68
ARG max_jobs=2
ENV MAX_JOBS=${max_jobs}
69
70
71
# number of threads used by nvcc
ARG nvcc_threads=8
ENV NVCC_THREADS=$nvcc_threads
72
73
# make sure punica kernels are built (for LoRA)
ENV VLLM_INSTALL_PUNICA_KERNELS=1
74

75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
ARG USE_SCCACHE
# if USE_SCCACHE is set, use sccache to speed up compilation
RUN --mount=type=cache,target=/root/.cache/pip \
    if [ "$USE_SCCACHE" = "1" ]; then \
        echo "Installing sccache..." \
        && curl -L -o sccache.tar.gz https://github.com/mozilla/sccache/releases/download/v0.8.1/sccache-v0.8.1-x86_64-unknown-linux-musl.tar.gz \
        && tar -xzf sccache.tar.gz \
        && sudo mv sccache-v0.8.1-x86_64-unknown-linux-musl/sccache /usr/bin/sccache \
        && rm -rf sccache.tar.gz sccache-v0.8.1-x86_64-unknown-linux-musl \
        && export SCCACHE_BUCKET=vllm-build-sccache \
        && export SCCACHE_REGION=us-west-2 \
        && sccache --show-stats \
        && python3 setup.py bdist_wheel --dist-dir=dist \
        && sccache --show-stats; \
    fi

91
92
ENV CCACHE_DIR=/root/.cache/ccache
RUN --mount=type=cache,target=/root/.cache/ccache \
93
    --mount=type=cache,target=/root/.cache/pip \
94
95
96
    if [ "$USE_SCCACHE" != "1" ]; then \
        python3 setup.py bdist_wheel --dist-dir=dist; \
    fi
97

98
99
100
101
# check the size of the wheel, we cannot upload wheels larger than 100MB
COPY .buildkite/check-wheel-size.py check-wheel-size.py
RUN python3 check-wheel-size.py dist

Simon Mo's avatar
Simon Mo committed
102
#################### EXTENSION Build IMAGE ####################
Stephen Krider's avatar
Stephen Krider committed
103

104
105
#################### vLLM installation IMAGE ####################
# image with vLLM installed
106
FROM nvidia/cuda:12.4.1-base-ubuntu22.04 AS vllm-base
Simon Mo's avatar
Simon Mo committed
107
WORKDIR /vllm-workspace
108
109
110
111
112
113
114
115

RUN apt-get update -y \
    && apt-get install -y python3-pip git vim

# Workaround for https://github.com/openai/triton/issues/2507 and
# https://github.com/pytorch/pytorch/issues/107960 -- hopefully
# this won't be needed for future versions of this docker image
# or future versions of triton.
116
RUN ldconfig /usr/local/cuda-12.4/compat/
117
118
119
120
121
122

# install vllm wheel first, so that torch etc will be installed
RUN --mount=type=bind,from=build,src=/workspace/dist,target=/vllm-workspace/dist \
    --mount=type=cache,target=/root/.cache/pip \
    pip install dist/*.whl --verbose
#################### vLLM installation IMAGE ####################
Stephen Krider's avatar
Stephen Krider committed
123
124


125
126
127
128
#################### TEST IMAGE ####################
# image to run unit testing suite
# note that this uses vllm installed by `pip`
FROM vllm-base AS test
Stephen Krider's avatar
Stephen Krider committed
129

130
ADD . /vllm-workspace/
Stephen Krider's avatar
Stephen Krider committed
131

132
# install development dependencies (for testing)
Stephen Krider's avatar
Stephen Krider committed
133
RUN --mount=type=cache,target=/root/.cache/pip \
134
    pip install -r requirements-dev.txt
135

136
137
138
139
140
141
# doc requires source code
# we hide them inside `test_docs/` , so that this source code
# will not be imported by other tests
RUN mkdir test_docs
RUN mv docs test_docs/
RUN mv vllm test_docs/
Stephen Krider's avatar
Stephen Krider committed
142

143
#################### TEST IMAGE ####################
Stephen Krider's avatar
Stephen Krider committed
144

Simon Mo's avatar
Simon Mo committed
145
#################### OPENAI API SERVER ####################
Stephen Krider's avatar
Stephen Krider committed
146
147
# openai api server alternative
FROM vllm-base AS vllm-openai
148

149
# install additional dependencies for openai api server
Stephen Krider's avatar
Stephen Krider committed
150
RUN --mount=type=cache,target=/root/.cache/pip \
151
    pip install accelerate hf_transfer modelscope
Stephen Krider's avatar
Stephen Krider committed
152

yhu422's avatar
yhu422 committed
153
154
ENV VLLM_USAGE_SOURCE production-docker-image

Stephen Krider's avatar
Stephen Krider committed
155
ENTRYPOINT ["python3", "-m", "vllm.entrypoints.openai.api_server"]
Simon Mo's avatar
Simon Mo committed
156
#################### OPENAI API SERVER ####################