"vscode:/vscode.git/clone" did not exist on "9e67c4ce985b0b8852603cfe3fcaf8f37de137ed"
Dockerfile 8.6 KB
Newer Older
Simon Mo's avatar
Simon Mo committed
1
2
3
# The vLLM Dockerfile is used to construct vLLM image that can be directly used
# to run the OpenAI compatible server.

4
5
6
7
# Please update any changes made here to
# docs/source/dev/dockerfile/dockerfile.rst and
# docs/source/assets/dev/dockerfile-stages-dependency.png

8
ARG CUDA_VERSION=12.4.1
Simon Mo's avatar
Simon Mo committed
9
#################### BASE BUILD IMAGE ####################
10
# prepare basic build environment
11
FROM nvidia/cuda:${CUDA_VERSION}-devel-ubuntu20.04 AS base
12
ARG CUDA_VERSION=12.4.1
13
ARG PYTHON_VERSION=3.12
14
15
ENV DEBIAN_FRONTEND=noninteractive

16
# Install Python and other dependencies
17
18
19
RUN echo 'tzdata tzdata/Areas select America' | debconf-set-selections \
    && echo 'tzdata tzdata/Zones/America select Los_Angeles' | debconf-set-selections \
    && apt-get update -y \
20
    && apt-get install -y ccache software-properties-common git curl sudo \
21
22
    && add-apt-repository ppa:deadsnakes/ppa \
    && apt-get update -y \
23
    && apt-get install -y python${PYTHON_VERSION} python${PYTHON_VERSION}-dev python${PYTHON_VERSION}-venv \
24
25
26
27
28
    && update-alternatives --install /usr/bin/python3 python3 /usr/bin/python${PYTHON_VERSION} 1 \
    && update-alternatives --set python3 /usr/bin/python${PYTHON_VERSION} \
    && ln -sf /usr/bin/python${PYTHON_VERSION}-config /usr/bin/python3-config \
    && curl -sS https://bootstrap.pypa.io/get-pip.py | python${PYTHON_VERSION} \
    && python3 --version && python3 -m pip --version
Stephen Krider's avatar
Stephen Krider committed
29

30
31
32
33
# Workaround for https://github.com/openai/triton/issues/2507 and
# https://github.com/pytorch/pytorch/issues/107960 -- hopefully
# this won't be needed for future versions of this docker image
# or future versions of triton.
34
RUN ldconfig /usr/local/cuda-$(echo $CUDA_VERSION | cut -d. -f1,2)/compat/
35

Stephen Krider's avatar
Stephen Krider committed
36
37
38
WORKDIR /workspace

# install build and runtime dependencies
39
40
COPY requirements-common.txt requirements-common.txt
COPY requirements-cuda.txt requirements-cuda.txt
Stephen Krider's avatar
Stephen Krider committed
41
RUN --mount=type=cache,target=/root/.cache/pip \
42
    python3 -m pip install -r requirements-cuda.txt
43

Mor Zusman's avatar
Mor Zusman committed
44

45
46
47
48
49
50
# cuda arch list used by torch
# can be useful for both `dev` and `test`
# explicitly set the list to avoid issues with torch 2.2
# see https://github.com/pytorch/pytorch/pull/123243
ARG torch_cuda_arch_list='7.0 7.5 8.0 8.6 8.9 9.0+PTX'
ENV TORCH_CUDA_ARCH_LIST=${torch_cuda_arch_list}
51
52
53
# Override the arch list for flash-attn to reduce the binary size
ARG vllm_fa_cmake_gpu_arches='80-real;90-real'
ENV VLLM_FA_CMAKE_GPU_ARCHES=${vllm_fa_cmake_gpu_arches}
Simon Mo's avatar
Simon Mo committed
54
55
#################### BASE BUILD IMAGE ####################

56
#################### WHEEL BUILD IMAGE ####################
57
58
FROM base AS build

59
60
# install build dependencies
COPY requirements-build.txt requirements-build.txt
61

62
RUN --mount=type=cache,target=/root/.cache/pip \
63
    python3 -m pip install -r requirements-build.txt
64

65
# files and directories related to build wheels
Stephen Krider's avatar
Stephen Krider committed
66
67
COPY csrc csrc
COPY setup.py setup.py
bnellnm's avatar
bnellnm committed
68
69
COPY cmake cmake
COPY CMakeLists.txt CMakeLists.txt
70
71
COPY requirements-common.txt requirements-common.txt
COPY requirements-cuda.txt requirements-cuda.txt
Stephen Krider's avatar
Stephen Krider committed
72
COPY pyproject.toml pyproject.toml
73
COPY vllm vllm
Stephen Krider's avatar
Stephen Krider committed
74
75

# max jobs used by Ninja to build extensions
76
77
ARG max_jobs=2
ENV MAX_JOBS=${max_jobs}
78
79
80
# number of threads used by nvcc
ARG nvcc_threads=8
ENV NVCC_THREADS=$nvcc_threads
81

82
ARG USE_SCCACHE
83
84
ARG SCCACHE_BUCKET_NAME=vllm-build-sccache
ARG SCCACHE_REGION_NAME=us-west-2
85
ARG SCCACHE_S3_NO_CREDENTIALS=0
86
87
# if USE_SCCACHE is set, use sccache to speed up compilation
RUN --mount=type=cache,target=/root/.cache/pip \
88
    --mount=type=bind,source=.git,target=.git \
89
90
91
92
93
94
    if [ "$USE_SCCACHE" = "1" ]; then \
        echo "Installing sccache..." \
        && curl -L -o sccache.tar.gz https://github.com/mozilla/sccache/releases/download/v0.8.1/sccache-v0.8.1-x86_64-unknown-linux-musl.tar.gz \
        && tar -xzf sccache.tar.gz \
        && sudo mv sccache-v0.8.1-x86_64-unknown-linux-musl/sccache /usr/bin/sccache \
        && rm -rf sccache.tar.gz sccache-v0.8.1-x86_64-unknown-linux-musl \
95
96
        && export SCCACHE_BUCKET=${SCCACHE_BUCKET_NAME} \
        && export SCCACHE_REGION=${SCCACHE_REGION_NAME} \
97
        && export SCCACHE_S3_NO_CREDENTIALS=${SCCACHE_S3_NO_CREDENTIALS} \
98
        && export SCCACHE_IDLE_TIMEOUT=0 \
99
        && export CMAKE_BUILD_TYPE=Release \
100
        && sccache --show-stats \
101
        && python3 setup.py bdist_wheel --dist-dir=dist --py-limited-api=cp38 \
102
103
104
        && sccache --show-stats; \
    fi

105
106
ENV CCACHE_DIR=/root/.cache/ccache
RUN --mount=type=cache,target=/root/.cache/ccache \
107
    --mount=type=cache,target=/root/.cache/pip \
108
    --mount=type=bind,source=.git,target=.git  \
109
    if [ "$USE_SCCACHE" != "1" ]; then \
110
        python3 setup.py bdist_wheel --dist-dir=dist --py-limited-api=cp38; \
111
    fi
112

113
# Check the size of the wheel if RUN_WHEEL_CHECK is true
114
COPY .buildkite/check-wheel-size.py check-wheel-size.py
115
116
117
118
119
120
121
122
123
# Default max size of the wheel is 250MB
ARG VLLM_MAX_SIZE_MB=250
ENV VLLM_MAX_SIZE_MB=$VLLM_MAX_SIZE_MB
ARG RUN_WHEEL_CHECK=true
RUN if [ "$RUN_WHEEL_CHECK" = "true" ]; then \
        python3 check-wheel-size.py dist; \
    else \
        echo "Skipping wheel size check."; \
    fi
Simon Mo's avatar
Simon Mo committed
124
#################### EXTENSION Build IMAGE ####################
Stephen Krider's avatar
Stephen Krider committed
125

126
127
128
129
130
131
132
133
134
135
#################### DEV IMAGE ####################
FROM base as dev

COPY requirements-lint.txt requirements-lint.txt
COPY requirements-test.txt requirements-test.txt
COPY requirements-dev.txt requirements-dev.txt
RUN --mount=type=cache,target=/root/.cache/pip \
    python3 -m pip install -r requirements-dev.txt

#################### DEV IMAGE ####################
136
137
#################### vLLM installation IMAGE ####################
# image with vLLM installed
138
FROM nvidia/cuda:${CUDA_VERSION}-base-ubuntu20.04 AS vllm-base
139
ARG CUDA_VERSION=12.4.1
140
ARG PYTHON_VERSION=3.12
Simon Mo's avatar
Simon Mo committed
141
WORKDIR /vllm-workspace
142
143
144
145
ENV DEBIAN_FRONTEND=noninteractive

RUN PYTHON_VERSION_STR=$(echo ${PYTHON_VERSION} | sed 's/\.//g') && \
    echo "export PYTHON_VERSION_STR=${PYTHON_VERSION_STR}" >> /etc/environment
146

147
# Install Python and other dependencies
148
149
150
RUN echo 'tzdata tzdata/Areas select America' | debconf-set-selections \
    && echo 'tzdata tzdata/Zones/America select Los_Angeles' | debconf-set-selections \
    && apt-get update -y \
151
    && apt-get install -y ccache software-properties-common git curl sudo vim python3-pip \
152
    && apt-get install -y ffmpeg libsm6 libxext6 libgl1 \
153
154
    && add-apt-repository ppa:deadsnakes/ppa \
    && apt-get update -y \
155
156
157
158
159
160
    && apt-get install -y python${PYTHON_VERSION} python${PYTHON_VERSION}-dev python${PYTHON_VERSION}-venv libibverbs-dev \
    && update-alternatives --install /usr/bin/python3 python3 /usr/bin/python${PYTHON_VERSION} 1 \
    && update-alternatives --set python3 /usr/bin/python${PYTHON_VERSION} \
    && ln -sf /usr/bin/python${PYTHON_VERSION}-config /usr/bin/python3-config \
    && curl -sS https://bootstrap.pypa.io/get-pip.py | python${PYTHON_VERSION} \
    && python3 --version && python3 -m pip --version
161
162
163
164
165

# Workaround for https://github.com/openai/triton/issues/2507 and
# https://github.com/pytorch/pytorch/issues/107960 -- hopefully
# this won't be needed for future versions of this docker image
# or future versions of triton.
166
RUN ldconfig /usr/local/cuda-$(echo $CUDA_VERSION | cut -d. -f1,2)/compat/
167
168
169
170

# install vllm wheel first, so that torch etc will be installed
RUN --mount=type=bind,from=build,src=/workspace/dist,target=/vllm-workspace/dist \
    --mount=type=cache,target=/root/.cache/pip \
171
    python3 -m pip install dist/*.whl --verbose
Mor Zusman's avatar
Mor Zusman committed
172

173
RUN --mount=type=cache,target=/root/.cache/pip \
174
    . /etc/environment && \
175
    python3 -m pip install https://github.com/flashinfer-ai/flashinfer/releases/download/v0.1.6/flashinfer-0.1.6+cu121torch2.4-cp${PYTHON_VERSION_STR}-cp${PYTHON_VERSION_STR}-linux_x86_64.whl
176
#################### vLLM installation IMAGE ####################
Stephen Krider's avatar
Stephen Krider committed
177
178


179
180
181
182
#################### TEST IMAGE ####################
# image to run unit testing suite
# note that this uses vllm installed by `pip`
FROM vllm-base AS test
Stephen Krider's avatar
Stephen Krider committed
183

184
ADD . /vllm-workspace/
Stephen Krider's avatar
Stephen Krider committed
185

186
# install development dependencies (for testing)
Stephen Krider's avatar
Stephen Krider committed
187
RUN --mount=type=cache,target=/root/.cache/pip \
188
    python3 -m pip install -r requirements-dev.txt
189

190
191
192
193
194
195
# doc requires source code
# we hide them inside `test_docs/` , so that this source code
# will not be imported by other tests
RUN mkdir test_docs
RUN mv docs test_docs/
RUN mv vllm test_docs/
Stephen Krider's avatar
Stephen Krider committed
196

197
#################### TEST IMAGE ####################
Stephen Krider's avatar
Stephen Krider committed
198

Simon Mo's avatar
Simon Mo committed
199
#################### OPENAI API SERVER ####################
Stephen Krider's avatar
Stephen Krider committed
200
201
# openai api server alternative
FROM vllm-base AS vllm-openai
202

203
# install additional dependencies for openai api server
Stephen Krider's avatar
Stephen Krider committed
204
RUN --mount=type=cache,target=/root/.cache/pip \
205
    pip install accelerate hf_transfer 'modelscope!=1.15.0'
Stephen Krider's avatar
Stephen Krider committed
206

yhu422's avatar
yhu422 committed
207
208
ENV VLLM_USAGE_SOURCE production-docker-image

Stephen Krider's avatar
Stephen Krider committed
209
ENTRYPOINT ["python3", "-m", "vllm.entrypoints.openai.api_server"]
Simon Mo's avatar
Simon Mo committed
210
#################### OPENAI API SERVER ####################