"vllm/v1/executor/ray_utils.py" did not exist on "5f036d2bcc5244ca431212167c94700e5ae7a8e0"
Dockerfile 8.83 KB
Newer Older
Simon Mo's avatar
Simon Mo committed
1
2
3
# The vLLM Dockerfile is used to construct vLLM image that can be directly used
# to run the OpenAI compatible server.

4
5
6
7
# Please update any changes made here to
# docs/source/dev/dockerfile/dockerfile.rst and
# docs/source/assets/dev/dockerfile-stages-dependency.png

8
ARG CUDA_VERSION=12.4.1
Simon Mo's avatar
Simon Mo committed
9
#################### BASE BUILD IMAGE ####################
10
# prepare basic build environment
11
FROM nvidia/cuda:${CUDA_VERSION}-devel-ubuntu20.04 AS base
12
ARG CUDA_VERSION=12.4.1
13
ARG PYTHON_VERSION=3.12
14
15
ENV DEBIAN_FRONTEND=noninteractive

16
# Install Python and other dependencies
17
18
19
RUN echo 'tzdata tzdata/Areas select America' | debconf-set-selections \
    && echo 'tzdata tzdata/Zones/America select Los_Angeles' | debconf-set-selections \
    && apt-get update -y \
20
    && apt-get install -y ccache software-properties-common git curl sudo \
21
22
    && add-apt-repository ppa:deadsnakes/ppa \
    && apt-get update -y \
23
    && apt-get install -y python${PYTHON_VERSION} python${PYTHON_VERSION}-dev python${PYTHON_VERSION}-venv \
24
25
26
27
28
    && update-alternatives --install /usr/bin/python3 python3 /usr/bin/python${PYTHON_VERSION} 1 \
    && update-alternatives --set python3 /usr/bin/python${PYTHON_VERSION} \
    && ln -sf /usr/bin/python${PYTHON_VERSION}-config /usr/bin/python3-config \
    && curl -sS https://bootstrap.pypa.io/get-pip.py | python${PYTHON_VERSION} \
    && python3 --version && python3 -m pip --version
Stephen Krider's avatar
Stephen Krider committed
29

30
31
32
33
34
35
36
37
# Upgrade to GCC 10 to avoid https://gcc.gnu.org/bugzilla/show_bug.cgi?id=92519
# as it was causing spam when compiling the CUTLASS kernels
RUN apt-get install -y gcc-10 g++-10
RUN update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-10 110 --slave /usr/bin/g++ g++ /usr/bin/g++-10
RUN <<EOF
gcc --version
EOF

38
39
40
41
# Workaround for https://github.com/openai/triton/issues/2507 and
# https://github.com/pytorch/pytorch/issues/107960 -- hopefully
# this won't be needed for future versions of this docker image
# or future versions of triton.
42
RUN ldconfig /usr/local/cuda-$(echo $CUDA_VERSION | cut -d. -f1,2)/compat/
43

Stephen Krider's avatar
Stephen Krider committed
44
45
46
WORKDIR /workspace

# install build and runtime dependencies
47
48
COPY requirements-common.txt requirements-common.txt
COPY requirements-cuda.txt requirements-cuda.txt
Stephen Krider's avatar
Stephen Krider committed
49
RUN --mount=type=cache,target=/root/.cache/pip \
50
    python3 -m pip install -r requirements-cuda.txt
51

Mor Zusman's avatar
Mor Zusman committed
52

53
54
55
56
57
58
# cuda arch list used by torch
# can be useful for both `dev` and `test`
# explicitly set the list to avoid issues with torch 2.2
# see https://github.com/pytorch/pytorch/pull/123243
ARG torch_cuda_arch_list='7.0 7.5 8.0 8.6 8.9 9.0+PTX'
ENV TORCH_CUDA_ARCH_LIST=${torch_cuda_arch_list}
59
60
61
# Override the arch list for flash-attn to reduce the binary size
ARG vllm_fa_cmake_gpu_arches='80-real;90-real'
ENV VLLM_FA_CMAKE_GPU_ARCHES=${vllm_fa_cmake_gpu_arches}
Simon Mo's avatar
Simon Mo committed
62
63
#################### BASE BUILD IMAGE ####################

64
#################### WHEEL BUILD IMAGE ####################
65
66
FROM base AS build

67
68
# install build dependencies
COPY requirements-build.txt requirements-build.txt
69

70
RUN --mount=type=cache,target=/root/.cache/pip \
71
    python3 -m pip install -r requirements-build.txt
72

73
COPY . .
74
75
76
ARG GIT_REPO_CHECK=0
RUN --mount=type=bind,source=.git,target=.git \
    if [ "$GIT_REPO_CHECK" != 0 ]; then bash tools/check_repo.sh ; fi
Stephen Krider's avatar
Stephen Krider committed
77
78

# max jobs used by Ninja to build extensions
79
80
ARG max_jobs=2
ENV MAX_JOBS=${max_jobs}
81
82
83
# number of threads used by nvcc
ARG nvcc_threads=8
ENV NVCC_THREADS=$nvcc_threads
84

85
ARG USE_SCCACHE
86
87
ARG SCCACHE_BUCKET_NAME=vllm-build-sccache
ARG SCCACHE_REGION_NAME=us-west-2
88
ARG SCCACHE_S3_NO_CREDENTIALS=0
89
90
# if USE_SCCACHE is set, use sccache to speed up compilation
RUN --mount=type=cache,target=/root/.cache/pip \
91
    --mount=type=bind,source=.git,target=.git \
92
93
94
95
96
97
    if [ "$USE_SCCACHE" = "1" ]; then \
        echo "Installing sccache..." \
        && curl -L -o sccache.tar.gz https://github.com/mozilla/sccache/releases/download/v0.8.1/sccache-v0.8.1-x86_64-unknown-linux-musl.tar.gz \
        && tar -xzf sccache.tar.gz \
        && sudo mv sccache-v0.8.1-x86_64-unknown-linux-musl/sccache /usr/bin/sccache \
        && rm -rf sccache.tar.gz sccache-v0.8.1-x86_64-unknown-linux-musl \
98
99
        && export SCCACHE_BUCKET=${SCCACHE_BUCKET_NAME} \
        && export SCCACHE_REGION=${SCCACHE_REGION_NAME} \
100
        && export SCCACHE_S3_NO_CREDENTIALS=${SCCACHE_S3_NO_CREDENTIALS} \
101
        && export SCCACHE_IDLE_TIMEOUT=0 \
102
        && export CMAKE_BUILD_TYPE=Release \
103
        && sccache --show-stats \
104
        && python3 setup.py bdist_wheel --dist-dir=dist --py-limited-api=cp38 \
105
106
107
        && sccache --show-stats; \
    fi

108
109
ENV CCACHE_DIR=/root/.cache/ccache
RUN --mount=type=cache,target=/root/.cache/ccache \
110
    --mount=type=cache,target=/root/.cache/pip \
111
    --mount=type=bind,source=.git,target=.git  \
112
    if [ "$USE_SCCACHE" != "1" ]; then \
113
        python3 setup.py bdist_wheel --dist-dir=dist --py-limited-api=cp38; \
114
    fi
115

116
# Check the size of the wheel if RUN_WHEEL_CHECK is true
117
COPY .buildkite/check-wheel-size.py check-wheel-size.py
118
119
120
121
122
123
124
125
126
# Default max size of the wheel is 250MB
ARG VLLM_MAX_SIZE_MB=250
ENV VLLM_MAX_SIZE_MB=$VLLM_MAX_SIZE_MB
ARG RUN_WHEEL_CHECK=true
RUN if [ "$RUN_WHEEL_CHECK" = "true" ]; then \
        python3 check-wheel-size.py dist; \
    else \
        echo "Skipping wheel size check."; \
    fi
Simon Mo's avatar
Simon Mo committed
127
#################### EXTENSION Build IMAGE ####################
Stephen Krider's avatar
Stephen Krider committed
128

129
130
131
132
133
134
135
136
137
138
#################### DEV IMAGE ####################
FROM base as dev

COPY requirements-lint.txt requirements-lint.txt
COPY requirements-test.txt requirements-test.txt
COPY requirements-dev.txt requirements-dev.txt
RUN --mount=type=cache,target=/root/.cache/pip \
    python3 -m pip install -r requirements-dev.txt

#################### DEV IMAGE ####################
139
140
#################### vLLM installation IMAGE ####################
# image with vLLM installed
141
FROM nvidia/cuda:${CUDA_VERSION}-base-ubuntu22.04 AS vllm-base
142
ARG CUDA_VERSION=12.4.1
143
ARG PYTHON_VERSION=3.12
Simon Mo's avatar
Simon Mo committed
144
WORKDIR /vllm-workspace
145
146
147
148
ENV DEBIAN_FRONTEND=noninteractive

RUN PYTHON_VERSION_STR=$(echo ${PYTHON_VERSION} | sed 's/\.//g') && \
    echo "export PYTHON_VERSION_STR=${PYTHON_VERSION_STR}" >> /etc/environment
149

150
# Install Python and other dependencies
151
152
153
RUN echo 'tzdata tzdata/Areas select America' | debconf-set-selections \
    && echo 'tzdata tzdata/Zones/America select Los_Angeles' | debconf-set-selections \
    && apt-get update -y \
154
    && apt-get install -y ccache software-properties-common git curl sudo vim python3-pip \
155
    && apt-get install -y ffmpeg libsm6 libxext6 libgl1 \
156
157
    && add-apt-repository ppa:deadsnakes/ppa \
    && apt-get update -y \
158
159
160
161
162
163
    && apt-get install -y python${PYTHON_VERSION} python${PYTHON_VERSION}-dev python${PYTHON_VERSION}-venv libibverbs-dev \
    && update-alternatives --install /usr/bin/python3 python3 /usr/bin/python${PYTHON_VERSION} 1 \
    && update-alternatives --set python3 /usr/bin/python${PYTHON_VERSION} \
    && ln -sf /usr/bin/python${PYTHON_VERSION}-config /usr/bin/python3-config \
    && curl -sS https://bootstrap.pypa.io/get-pip.py | python${PYTHON_VERSION} \
    && python3 --version && python3 -m pip --version
164
165
166
167
168

# Workaround for https://github.com/openai/triton/issues/2507 and
# https://github.com/pytorch/pytorch/issues/107960 -- hopefully
# this won't be needed for future versions of this docker image
# or future versions of triton.
169
RUN ldconfig /usr/local/cuda-$(echo $CUDA_VERSION | cut -d. -f1,2)/compat/
170
171
172
173

# install vllm wheel first, so that torch etc will be installed
RUN --mount=type=bind,from=build,src=/workspace/dist,target=/vllm-workspace/dist \
    --mount=type=cache,target=/root/.cache/pip \
174
    python3 -m pip install dist/*.whl --verbose
Mor Zusman's avatar
Mor Zusman committed
175

176
RUN --mount=type=cache,target=/root/.cache/pip \
177
    . /etc/environment && \
178
    python3 -m pip install https://github.com/flashinfer-ai/flashinfer/releases/download/v0.1.6/flashinfer-0.1.6+cu121torch2.4-cp${PYTHON_VERSION_STR}-cp${PYTHON_VERSION_STR}-linux_x86_64.whl
179
COPY examples examples
180
#################### vLLM installation IMAGE ####################
Stephen Krider's avatar
Stephen Krider committed
181
182


183
184
185
186
#################### TEST IMAGE ####################
# image to run unit testing suite
# note that this uses vllm installed by `pip`
FROM vllm-base AS test
Stephen Krider's avatar
Stephen Krider committed
187

188
ADD . /vllm-workspace/
Stephen Krider's avatar
Stephen Krider committed
189

190
# install development dependencies (for testing)
Stephen Krider's avatar
Stephen Krider committed
191
RUN --mount=type=cache,target=/root/.cache/pip \
192
    python3 -m pip install -r requirements-dev.txt
193

194
195
196
197
198
199
# doc requires source code
# we hide them inside `test_docs/` , so that this source code
# will not be imported by other tests
RUN mkdir test_docs
RUN mv docs test_docs/
RUN mv vllm test_docs/
Stephen Krider's avatar
Stephen Krider committed
200

201
#################### TEST IMAGE ####################
Stephen Krider's avatar
Stephen Krider committed
202

Simon Mo's avatar
Simon Mo committed
203
#################### OPENAI API SERVER ####################
Stephen Krider's avatar
Stephen Krider committed
204
205
# openai api server alternative
FROM vllm-base AS vllm-openai
206

207
# install additional dependencies for openai api server
Stephen Krider's avatar
Stephen Krider committed
208
RUN --mount=type=cache,target=/root/.cache/pip \
209
    pip install accelerate hf_transfer 'modelscope!=1.15.0' bitsandbytes>=0.44.0 timm==0.9.10
Stephen Krider's avatar
Stephen Krider committed
210

yhu422's avatar
yhu422 committed
211
212
ENV VLLM_USAGE_SOURCE production-docker-image

Stephen Krider's avatar
Stephen Krider committed
213
ENTRYPOINT ["python3", "-m", "vllm.entrypoints.openai.api_server"]
Simon Mo's avatar
Simon Mo committed
214
#################### OPENAI API SERVER ####################