Dockerfile.rocm_base 12.4 KB
Newer Older
1
ARG BASE_IMAGE=rocm/dev-ubuntu-22.04:7.0-complete
2
ARG TRITON_BRANCH="57c693b6"
3
ARG TRITON_REPO="https://github.com/ROCm/triton.git"
4
ARG PYTORCH_BRANCH="89075173"
5
ARG PYTORCH_REPO="https://github.com/ROCm/pytorch.git"
6
ARG PYTORCH_VISION_BRANCH="v0.24.1"
7
ARG PYTORCH_VISION_REPO="https://github.com/pytorch/vision.git"
8
9
ARG PYTORCH_AUDIO_BRANCH="v2.9.0"
ARG PYTORCH_AUDIO_REPO="https://github.com/pytorch/audio.git"
10
ARG FA_BRANCH="0e60e394"
11
ARG FA_REPO="https://github.com/Dao-AILab/flash-attention.git"
12
ARG AITER_BRANCH="v0.1.10.post2"
13
ARG AITER_REPO="https://github.com/ROCm/aiter.git"
14
15
ARG MORI_BRANCH="2d02c6a9"
ARG MORI_REPO="https://github.com/ROCm/mori.git"
16

17
18
19
20
21
22
23
24
# Sccache configuration (only used in release pipeline)
ARG USE_SCCACHE
ARG SCCACHE_DOWNLOAD_URL
ARG SCCACHE_ENDPOINT
ARG SCCACHE_BUCKET_NAME=vllm-build-sccache
ARG SCCACHE_REGION_NAME=us-west-2
ARG SCCACHE_S3_NO_CREDENTIALS=0

25
26
FROM ${BASE_IMAGE} AS base

27
ENV PATH=/opt/rocm/llvm/bin:/opt/rocm/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin
28
29
ENV ROCM_PATH=/opt/rocm
ENV LD_LIBRARY_PATH=/opt/rocm/lib:/usr/local/lib:
30
ARG PYTORCH_ROCM_ARCH=gfx90a;gfx942;gfx950;gfx1100;gfx1101;gfx1200;gfx1201;gfx1150;gfx1151
31
ENV PYTORCH_ROCM_ARCH=${PYTORCH_ROCM_ARCH}
32
ENV AITER_ROCM_ARCH=gfx942;gfx950
33
ENV MORI_GPU_ARCHS=gfx942;gfx950
34

35
36
37
# Required for RCCL in ROCm7.1
ENV HSA_NO_SCRATCH_RECLAIM=1

38
ARG PYTHON_VERSION=3.12
39
ENV PYTHON_VERSION=${PYTHON_VERSION}
40
41
42
43
44
45
46

RUN mkdir -p /app
WORKDIR /app
ENV DEBIAN_FRONTEND=noninteractive

# Install Python and other dependencies
RUN apt-get update -y \
47
    && apt-get install -y software-properties-common git curl sudo vim less libgfortran5 libopenmpi-dev libpci-dev \
48
49
50
51
    && for i in 1 2 3; do \
        add-apt-repository -y ppa:deadsnakes/ppa && break || \
        { echo "Attempt $i failed, retrying in 5s..."; sleep 5; }; \
    done \
52
53
54
55
56
57
58
59
60
    && apt-get update -y \
    && apt-get install -y python${PYTHON_VERSION} python${PYTHON_VERSION}-dev python${PYTHON_VERSION}-venv \
       python${PYTHON_VERSION}-lib2to3 python-is-python3  \
    && update-alternatives --install /usr/bin/python3 python3 /usr/bin/python${PYTHON_VERSION} 1 \
    && update-alternatives --set python3 /usr/bin/python${PYTHON_VERSION} \
    && ln -sf /usr/bin/python${PYTHON_VERSION}-config /usr/bin/python3-config \
    && curl -sS https://bootstrap.pypa.io/get-pip.py | python${PYTHON_VERSION} \
    && python3 --version && python3 -m pip --version

61
RUN pip install -U packaging 'cmake<4' ninja wheel 'setuptools<80' pybind11 Cython
62
RUN apt-get update && apt-get install -y libjpeg-dev libsox-dev libsox-fmt-all sox && rm -rf /var/lib/apt/lists/*
63

64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
# Install sccache if USE_SCCACHE is enabled (for release builds)
ARG USE_SCCACHE
ARG SCCACHE_DOWNLOAD_URL
ARG SCCACHE_ENDPOINT
ARG SCCACHE_BUCKET_NAME
ARG SCCACHE_REGION_NAME
ARG SCCACHE_S3_NO_CREDENTIALS
RUN if [ "$USE_SCCACHE" = "1" ]; then \
        echo "Installing sccache..." \
        && SCCACHE_ARCH="x86_64" \
        && SCCACHE_VERSION="v0.8.1" \
        && SCCACHE_DL_URL="${SCCACHE_DOWNLOAD_URL:-https://github.com/mozilla/sccache/releases/download/${SCCACHE_VERSION}/sccache-${SCCACHE_VERSION}-${SCCACHE_ARCH}-unknown-linux-musl.tar.gz}" \
        && curl -L -o /tmp/sccache.tar.gz ${SCCACHE_DL_URL} \
        && tar -xzf /tmp/sccache.tar.gz -C /tmp \
        && mv /tmp/sccache-${SCCACHE_VERSION}-${SCCACHE_ARCH}-unknown-linux-musl/sccache /usr/bin/sccache \
        && chmod +x /usr/bin/sccache \
        && rm -rf /tmp/sccache.tar.gz /tmp/sccache-${SCCACHE_VERSION}-${SCCACHE_ARCH}-unknown-linux-musl \
        && sccache --version; \
    fi

# Setup sccache for HIP compilation via HIP_CLANG_PATH
# This creates wrapper scripts in a separate directory and points HIP to use them
# This avoids modifying the original ROCm binaries which can break detection
# NOTE: HIP_CLANG_PATH is NOT set as ENV to avoid affecting downstream images (Dockerfile.rocm)
# Instead, each build stage should export HIP_CLANG_PATH=/opt/sccache-wrappers if USE_SCCACHE=1
RUN if [ "$USE_SCCACHE" = "1" ]; then \
        echo "Setting up sccache wrappers for HIP compilation..." \
        && mkdir -p /opt/sccache-wrappers \
        && printf '#!/bin/bash\nexec sccache /opt/rocm/lib/llvm/bin/clang++ "$@"\n' > /opt/sccache-wrappers/clang++ \
        && chmod +x /opt/sccache-wrappers/clang++ \
        && printf '#!/bin/bash\nexec sccache /opt/rocm/lib/llvm/bin/clang "$@"\n' > /opt/sccache-wrappers/clang \
        && chmod +x /opt/sccache-wrappers/clang \
        && echo "sccache wrappers created in /opt/sccache-wrappers"; \
    fi

# Set sccache environment variables only when USE_SCCACHE=1
# This prevents S3 config from leaking into images when sccache is not used
ARG USE_SCCACHE
ENV SCCACHE_BUCKET=${USE_SCCACHE:+${SCCACHE_BUCKET_NAME}}
ENV SCCACHE_REGION=${USE_SCCACHE:+${SCCACHE_REGION_NAME}}
ENV SCCACHE_S3_NO_CREDENTIALS=${USE_SCCACHE:+${SCCACHE_S3_NO_CREDENTIALS}}
ENV SCCACHE_IDLE_TIMEOUT=${USE_SCCACHE:+0}

107
108
109
110

###
### Triton Build
###
111
112
113
114
115
116
FROM base AS build_triton
ARG TRITON_BRANCH
ARG TRITON_REPO
RUN git clone ${TRITON_REPO}
RUN cd triton \
    && git checkout ${TRITON_BRANCH} \
117
118
119
120
121
    && if [ ! -f setup.py ]; then cd python; fi \
    && python3 setup.py bdist_wheel --dist-dir=dist \
    && mkdir -p /app/install && cp dist/*.whl /app/install
RUN if [ -d triton/python/triton_kernels ]; then pip install build && cd triton/python/triton_kernels \
    && python3 -m build --wheel && cp dist/*.whl /app/install; fi
122

123
124
125
126

###
### AMD SMI Build
###
127
128
129
130
131
FROM base AS build_amdsmi
RUN cd /opt/rocm/share/amd_smi \
    && pip wheel . --wheel-dir=dist
RUN mkdir -p /app/install && cp /opt/rocm/share/amd_smi/dist/*.whl /app/install

132
133
134
135

###
### Pytorch build
###
136
137
138
FROM base AS build_pytorch
ARG PYTORCH_BRANCH
ARG PYTORCH_VISION_BRANCH
139
ARG PYTORCH_AUDIO_BRANCH
140
141
ARG PYTORCH_REPO
ARG PYTORCH_VISION_REPO
142
ARG PYTORCH_AUDIO_REPO
143
ARG USE_SCCACHE
144

145
RUN git clone ${PYTORCH_REPO} pytorch
146
147
RUN cd pytorch && git checkout ${PYTORCH_BRANCH} \
    && pip install -r requirements.txt && git submodule update --init --recursive \
148
    && python3 tools/amd_build/build_amd.py \
149
150
151
152
153
154
    && if [ "$USE_SCCACHE" = "1" ]; then \
           export HIP_CLANG_PATH=/opt/sccache-wrappers \
           && export CMAKE_C_COMPILER_LAUNCHER=sccache \
           && export CMAKE_CXX_COMPILER_LAUNCHER=sccache \
           && sccache --show-stats; \
       fi \
155
    && CMAKE_PREFIX_PATH=$(python3 -c 'import sys; print(sys.prefix)') python3 setup.py bdist_wheel --dist-dir=dist \
156
    && if [ "$USE_SCCACHE" = "1" ]; then sccache --show-stats; fi \
157
158
159
    && pip install dist/*.whl
RUN git clone ${PYTORCH_VISION_REPO} vision
RUN cd vision && git checkout ${PYTORCH_VISION_BRANCH} \
160
161
162
163
164
    && if [ "$USE_SCCACHE" = "1" ]; then \
           export HIP_CLANG_PATH=/opt/sccache-wrappers \
           && export CMAKE_C_COMPILER_LAUNCHER=sccache \
           && export CMAKE_CXX_COMPILER_LAUNCHER=sccache; \
       fi \
165
    && python3 setup.py bdist_wheel --dist-dir=dist \
166
    && if [ "$USE_SCCACHE" = "1" ]; then sccache --show-stats; fi \
167
    && pip install dist/*.whl
168
169
170
171
RUN git clone ${PYTORCH_AUDIO_REPO} audio
RUN cd audio && git checkout ${PYTORCH_AUDIO_BRANCH} \
    && git submodule update --init --recursive \
    && pip install -r requirements.txt \
172
173
174
175
176
    && if [ "$USE_SCCACHE" = "1" ]; then \
           export HIP_CLANG_PATH=/opt/sccache-wrappers \
           && export CMAKE_C_COMPILER_LAUNCHER=sccache \
           && export CMAKE_CXX_COMPILER_LAUNCHER=sccache; \
       fi \
177
    && python3 setup.py bdist_wheel --dist-dir=dist \
178
    && if [ "$USE_SCCACHE" = "1" ]; then sccache --show-stats; fi \
179
    && pip install dist/*.whl
180
RUN mkdir -p /app/install && cp /app/pytorch/dist/*.whl /app/install \
181
182
    && cp /app/vision/dist/*.whl /app/install \
    && cp /app/audio/dist/*.whl /app/install
183

184

185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
###
### MORI Build
###
FROM base AS build_mori
ARG MORI_BRANCH
ARG MORI_REPO
RUN --mount=type=bind,from=build_pytorch,src=/app/install/,target=/install \
    pip install /install/*.whl
RUN git clone ${MORI_REPO}
RUN cd mori \
    && git checkout ${MORI_BRANCH} \
    && git submodule update --init --recursive \
    && python3 setup.py bdist_wheel --dist-dir=dist && ls /app/mori/dist/*.whl
RUN mkdir -p /app/install && cp /app/mori/dist/*.whl /app/install


201
202
203
###
### FlashAttention Build
###
204
205
206
FROM base AS build_fa
ARG FA_BRANCH
ARG FA_REPO
207
ARG USE_SCCACHE
208
209
RUN --mount=type=bind,from=build_pytorch,src=/app/install/,target=/install \
    pip install /install/*.whl
210
211
212
213
RUN git clone ${FA_REPO}
RUN cd flash-attention \
    && git checkout ${FA_BRANCH} \
    && git submodule update --init \
214
215
216
217
218
219
    && if [ "$USE_SCCACHE" = "1" ]; then \
           export HIP_CLANG_PATH=/opt/sccache-wrappers \
           && sccache --show-stats; \
       fi \
    && GPU_ARCHS=$(echo ${PYTORCH_ROCM_ARCH} | sed -e 's/;gfx1[0-9]\{3\}//g') python3 setup.py bdist_wheel --dist-dir=dist \
    && if [ "$USE_SCCACHE" = "1" ]; then sccache --show-stats; fi
220
RUN mkdir -p /app/install && cp /app/flash-attention/dist/*.whl /app/install
221

222
223
224
225

###
### AITER Build
###
226
227
228
FROM base AS build_aiter
ARG AITER_BRANCH
ARG AITER_REPO
229
ARG USE_SCCACHE
230
231
232
233
234
235
236
RUN --mount=type=bind,from=build_pytorch,src=/app/install/,target=/install \
    pip install /install/*.whl
RUN git clone --recursive ${AITER_REPO}
RUN cd aiter \
    && git checkout ${AITER_BRANCH} \
    && git submodule update --init --recursive \
    && pip install -r requirements.txt
237
238
239
240
241
RUN pip install pyyaml && cd aiter \
    && if [ "$USE_SCCACHE" = "1" ]; then \
           export HIP_CLANG_PATH=/opt/sccache-wrappers \
           && sccache --show-stats; \
       fi \
242
    && GPU_ARCHS=${AITER_ROCM_ARCH} python3 setup.py bdist_wheel --dist-dir=dist \
243
244
    && if [ "$USE_SCCACHE" = "1" ]; then sccache --show-stats; fi \
    && ls /app/aiter/dist/*.whl
245
246
RUN mkdir -p /app/install && cp /app/aiter/dist/*.whl /app/install

247
248
249
250

###
### Final Build
###
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267

# Wheel release stage - 
# only includes dependencies used by wheel release pipeline
FROM base AS debs_wheel_release
RUN mkdir /app/debs
RUN --mount=type=bind,from=build_triton,src=/app/install/,target=/install \
    cp /install/*.whl /app/debs
RUN --mount=type=bind,from=build_fa,src=/app/install/,target=/install \
    cp /install/*.whl /app/debs
RUN --mount=type=bind,from=build_amdsmi,src=/app/install/,target=/install \
    cp /install/*.whl /app/debs
RUN --mount=type=bind,from=build_pytorch,src=/app/install/,target=/install \
    cp /install/*.whl /app/debs
RUN --mount=type=bind,from=build_aiter,src=/app/install/,target=/install \
    cp /install/*.whl /app/debs

# Full debs stage - includes Mori (used by Docker releases)
268
269
270
271
FROM base AS debs
RUN mkdir /app/debs
RUN --mount=type=bind,from=build_triton,src=/app/install/,target=/install \
    cp /install/*.whl /app/debs
272
273
RUN --mount=type=bind,from=build_fa,src=/app/install/,target=/install \
    cp /install/*.whl /app/debs
274
275
276
277
278
279
RUN --mount=type=bind,from=build_amdsmi,src=/app/install/,target=/install \
    cp /install/*.whl /app/debs
RUN --mount=type=bind,from=build_pytorch,src=/app/install/,target=/install \
    cp /install/*.whl /app/debs
RUN --mount=type=bind,from=build_aiter,src=/app/install/,target=/install \
    cp /install/*.whl /app/debs
280
281
RUN --mount=type=bind,from=build_mori,src=/app/install/,target=/install \
    cp /install/*.whl /app/debs
282

283
FROM base AS final
284
RUN --mount=type=bind,from=debs,src=/app/debs,target=/install \
285
    pip install /install/*.whl
286

287
288
289
290
291
292
293
ARG BASE_IMAGE
ARG TRITON_BRANCH
ARG TRITON_REPO
ARG PYTORCH_BRANCH
ARG PYTORCH_VISION_BRANCH
ARG PYTORCH_REPO
ARG PYTORCH_VISION_REPO
294
295
ARG PYTORCH_AUDIO_BRANCH
ARG PYTORCH_AUDIO_REPO
296
297
ARG FA_BRANCH
ARG FA_REPO
298
299
ARG AITER_BRANCH
ARG AITER_REPO
300
301
ARG MORI_BRANCH
ARG MORI_REPO
302
303
304
305
306
307
308
RUN echo "BASE_IMAGE: ${BASE_IMAGE}" > /app/versions.txt \
    && echo "TRITON_BRANCH: ${TRITON_BRANCH}" >> /app/versions.txt \
    && echo "TRITON_REPO: ${TRITON_REPO}" >> /app/versions.txt \
    && echo "PYTORCH_BRANCH: ${PYTORCH_BRANCH}" >> /app/versions.txt \
    && echo "PYTORCH_VISION_BRANCH: ${PYTORCH_VISION_BRANCH}" >> /app/versions.txt \
    && echo "PYTORCH_REPO: ${PYTORCH_REPO}" >> /app/versions.txt \
    && echo "PYTORCH_VISION_REPO: ${PYTORCH_VISION_REPO}" >> /app/versions.txt \
309
310
    && echo "PYTORCH_AUDIO_BRANCH: ${PYTORCH_AUDIO_BRANCH}" >> /app/versions.txt \
    && echo "PYTORCH_AUDIO_REPO: ${PYTORCH_AUDIO_REPO}" >> /app/versions.txt \
311
    && echo "FA_BRANCH: ${FA_BRANCH}" >> /app/versions.txt \
312
    && echo "FA_REPO: ${FA_REPO}" >> /app/versions.txt \
313
    && echo "AITER_BRANCH: ${AITER_BRANCH}" >> /app/versions.txt \
314
    && echo "AITER_REPO: ${AITER_REPO}" >> /app/versions.txt \
315
316
    && echo "MORI_BRANCH: ${MORI_BRANCH}" >> /app/versions.txt \
    && echo "MORI_REPO: ${MORI_REPO}" >> /app/versions.txt