Dockerfile.rocm 8.15 KB
Newer Older
1
2
3
4
# default base image
ARG REMOTE_VLLM="0"
ARG COMMON_WORKDIR=/app
ARG BASE_IMAGE=rocm/vllm-dev:base
5

6
FROM ${BASE_IMAGE} AS base
7

8
9
ARG ARG_PYTORCH_ROCM_ARCH
ENV PYTORCH_ROCM_ARCH=${ARG_PYTORCH_ROCM_ARCH:-${PYTORCH_ROCM_ARCH}}
10
11
ENV RAY_EXPERIMENTAL_NOSET_ROCR_VISIBLE_DEVICES=1
ENV RAY_EXPERIMENTAL_NOSET_HIP_VISIBLE_DEVICES=1
12
13

# Install some basic utilities
14
RUN apt-get update -q -y && apt-get install -q -y \
15
16
    sqlite3 libsqlite3-dev libfmt-dev libmsgpack-dev libsuitesparse-dev \
    apt-transport-https ca-certificates wget curl
17
# Remove sccache
18
RUN python3 -m pip install --upgrade pip
19
RUN apt-get purge -y sccache; python3 -m pip uninstall -y sccache; rm -f "$(which sccache)"
20
21

# Install UV
22
RUN curl -LsSf https://astral.sh/uv/install.sh | env UV_INSTALL_DIR="/usr/local/bin" sh
23
24
25
26
27
28
29
30

# This timeout (in seconds) is necessary when installing some dependencies via uv since it's likely to time out
# Reference: https://github.com/astral-sh/uv/pull/1694
ENV UV_HTTP_TIMEOUT=500
ENV UV_INDEX_STRATEGY="unsafe-best-match"
# Use copy mode to avoid hardlink failures with Docker cache mounts
ENV UV_LINK_MODE=copy

31
32
33
34
35
36
37
38
39
40
41
ARG COMMON_WORKDIR
WORKDIR ${COMMON_WORKDIR}


# -----------------------
# vLLM fetch stages
FROM base AS fetch_vllm_0
ONBUILD COPY ./ vllm/
FROM base AS fetch_vllm_1
ARG VLLM_REPO="https://github.com/vllm-project/vllm.git"
ARG VLLM_BRANCH="main"
42
43
ENV VLLM_REPO=${VLLM_REPO}
ENV VLLM_BRANCH=${VLLM_BRANCH}
44
45
ONBUILD RUN git clone ${VLLM_REPO} \
	    && cd vllm \
46
	    && git fetch -v --prune -- origin ${VLLM_BRANCH} \
47
48
49
50
	    && git checkout FETCH_HEAD \
        && if [ ${VLLM_REPO} != "https://github.com/vllm-project/vllm.git" ] ; then \
               git remote add upstream "https://github.com/vllm-project/vllm.git" \
               && git fetch upstream ; fi
51
52
53
54
55
56
57
FROM fetch_vllm_${REMOTE_VLLM} AS fetch_vllm

# -----------------------
# vLLM build stages
FROM fetch_vllm AS build_vllm
# Build vLLM
RUN cd vllm \
58
    && python3 -m pip install -r requirements/rocm.txt \
59
60
61
62
63
    && python3 setup.py clean --all  \
    && python3 setup.py bdist_wheel --dist-dir=dist
FROM scratch AS export_vllm
ARG COMMON_WORKDIR
COPY --from=build_vllm ${COMMON_WORKDIR}/vllm/dist/*.whl /
64
COPY --from=build_vllm ${COMMON_WORKDIR}/vllm/requirements /requirements
65
66
67
COPY --from=build_vllm ${COMMON_WORKDIR}/vllm/benchmarks /benchmarks
COPY --from=build_vllm ${COMMON_WORKDIR}/vllm/tests /tests
COPY --from=build_vllm ${COMMON_WORKDIR}/vllm/examples /examples
68
COPY --from=build_vllm ${COMMON_WORKDIR}/vllm/docker/Dockerfile.rocm /docker/
69
COPY --from=build_vllm ${COMMON_WORKDIR}/vllm/.buildkite /.buildkite
70
COPY --from=build_vllm ${COMMON_WORKDIR}/vllm/vllm/v1 /vllm_v1
71

72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
# -----------------------
# RIXL/UCX build stages
FROM base AS build_rixl
ARG RIXL_BRANCH="f33a5599"
ARG RIXL_REPO="https://github.com/ROCm/RIXL.git"
ARG UCX_BRANCH="da3fac2a"
ARG UCX_REPO="https://github.com/ROCm/ucx.git"
ENV ROCM_PATH=/opt/rocm
ENV UCX_HOME=/usr/local/ucx
ENV RIXL_HOME=/usr/local/rixl
ENV RIXL_BENCH_HOME=/usr/local/rixl_bench

# RIXL build system dependences and RDMA support
RUN apt-get -y update && apt-get -y install autoconf libtool pkg-config \
    libgrpc-dev \
    libgrpc++-dev \
    libprotobuf-dev \
    protobuf-compiler-grpc \
    libcpprest-dev \
    libaio-dev \
    librdmacm1 \
    librdmacm-dev \
    libibverbs1 \
    libibverbs-dev \
    ibverbs-utils \
    rdmacm-utils \
    ibverbs-providers \
    && rm -rf /var/lib/apt/lists/*

RUN uv pip install --system meson auditwheel patchelf tomlkit

RUN cd /usr/local/src && \
    git clone ${UCX_REPO} &&  \
    cd ucx  && \
    git checkout ${UCX_BRANCH} && \
    ./autogen.sh && \
    mkdir build && cd build && \
    ../configure \
        --prefix=/usr/local/ucx \
        --enable-shared \
        --disable-static \
        --disable-doxygen-doc \
        --enable-optimizations \
        --enable-devel-headers \
        --with-rocm=/opt/rocm \
        --with-verbs \
        --with-dm \
        --enable-mt && \
    make -j && \
    make install

ENV PATH=/usr/local/ucx/bin:$PATH
ENV LD_LIBRARY_PATH=${UCX_HOME}/lib:${LD_LIBRARY_PATH}

RUN git clone ${RIXL_REPO} /opt/rixl && \
    cd /opt/rixl && \
    git checkout ${RIXL_BRANCH} && \
    meson setup build --prefix=${RIXL_HOME} \
                     -Ducx_path=${UCX_HOME} \
                     -Drocm_path=${ROCM_PATH} && \
    cd build && \
    ninja && \
    ninja install

# Generate RIXL wheel
RUN cd /opt/rixl && mkdir -p /app/install && \
    ./contrib/build-wheel.sh \
        --output-dir /app/install \
        --rocm-dir ${ROCM_PATH} \
        --ucx-plugins-dir ${UCX_HOME}/lib/ucx \
        --nixl-plugins-dir ${RIXL_HOME}/lib/x86_64-linux-gnu/plugins

144
145
146
147
148
149
# -----------------------
# Test vLLM image
FROM base AS test

RUN python3 -m pip install --upgrade pip && rm -rf /var/lib/apt/lists/*

150
151
# Install vLLM using uv (inherited from base stage)
# Note: No -U flag to avoid upgrading PyTorch ROCm to CUDA version
152
RUN --mount=type=bind,from=export_vllm,src=/,target=/install \
153
    --mount=type=cache,target=/root/.cache/uv \
154
    cd /install \
155
156
    && uv pip install --system -r requirements/rocm.txt \
    && uv pip install --system -r requirements/rocm-test.txt \
157
    && pip uninstall -y vllm \
158
    && uv pip install --system *.whl
159

160
161
162
163
# Install RIXL wheel
RUN --mount=type=bind,from=build_rixl,src=/app/install,target=/rixl_install \
    uv pip install --system /rixl_install/*.whl

164
165
166
WORKDIR /vllm-workspace
ARG COMMON_WORKDIR
COPY --from=build_vllm ${COMMON_WORKDIR}/vllm /vllm-workspace
167

168
169
170
# install development dependencies (for testing)
RUN cd /vllm-workspace \
    && python3 -m pip install -e tests/vllm_test_utils \
171
    && python3 -m pip install pytest-shard
172

173
174
175
176
177
# enable fast downloads from hf (for testing)
RUN --mount=type=cache,target=/root/.cache/uv \
    uv pip install --system hf_transfer
ENV HF_HUB_ENABLE_HF_TRANSFER=1

178
179
180
181
182
183
184
185
# install audio decode package `torchcodec` from source (required due to 
# ROCm and torch version mismatch) for tests with datasets package
COPY tools/install_torchcodec_rocm.sh /tmp/install_torchcodec.sh
RUN bash /tmp/install_torchcodec.sh \
    && rm /tmp/install_torchcodec.sh \
    && apt-get clean \
    && rm -rf /var/lib/apt/lists/*

186
# Copy in the v1 package (for python-only install test group)
187
188
189
190
191
192
193
COPY --from=export_vllm /vllm_v1 /usr/local/lib/python${PYTHON_VERSION}/dist-packages/vllm/v1

# Source code is used in the `python_only_compile.sh` test
# We hide it inside `src/` so that this source code
# will not be imported by other tests
RUN mkdir src && mv vllm src/vllm

194
195
# -----------------------
# Final vLLM image
196
FROM base AS final
197

198
199
200
201
202
203
204
RUN python3 -m pip install --upgrade pip && rm -rf /var/lib/apt/lists/*
# Error related to odd state for numpy 1.20.3 where there is no METADATA etc, but an extra LICENSES_bundled.txt.
# Manually remove it so that later steps of numpy upgrade can continue
RUN case "$(which python3)" in \
        *"/opt/conda/envs/py_3.9"*) \
            rm -rf /opt/conda/envs/py_3.9/lib/python3.9/site-packages/numpy-1.20.3.dist-info/;; \
        *) ;; esac
205

206
207
RUN --mount=type=cache,target=/root/.cache/uv \
    uv pip install --system --upgrade huggingface-hub[cli]
208

209
210
# Install vLLM using uv (inherited from base stage)
# Note: No -U flag to avoid upgrading PyTorch ROCm to CUDA version
211
RUN --mount=type=bind,from=export_vllm,src=/,target=/install \
212
    --mount=type=cache,target=/root/.cache/uv \
213
    cd /install \
214
    && uv pip install --system -r requirements/rocm.txt \
215
    && pip uninstall -y vllm \
216
    && uv pip install --system *.whl
217
218

ARG COMMON_WORKDIR
219
ARG BASE_IMAGE
220
221
222
223

# Copy over the benchmark scripts as well
COPY --from=export_vllm /benchmarks ${COMMON_WORKDIR}/vllm/benchmarks
COPY --from=export_vllm /examples ${COMMON_WORKDIR}/vllm/examples
224
COPY --from=export_vllm /docker ${COMMON_WORKDIR}/vllm/docker
225

226
ENV TOKENIZERS_PARALLELISM=false
227

228
229
230
# ENV that can improve safe tensor loading, and end-to-end time
ENV SAFETENSORS_FAST_GPU=1

231
232
# Performance environment variable.
ENV HIP_FORCE_DEV_KERNARG=1
youkaichao's avatar
youkaichao committed
233

234
235
236
237
238
# Workaround for ROCm profiler limits
RUN echo "ROCTRACER_MAX_EVENTS=10000000" > ${COMMON_WORKDIR}/libkineto.conf
ENV KINETO_CONFIG="${COMMON_WORKDIR}/libkineto.conf"
RUN echo "VLLM_BASE_IMAGE=${BASE_IMAGE}" >> ${COMMON_WORKDIR}/versions.txt

239
CMD ["/bin/bash"]
240
241
242
243

#Set entrypoint for vllm-openai official images
FROM final As vllm-openai
ENTRYPOINT ["vllm", "serve"]