Dockerfile.rocm 20.4 KB
Newer Older
1
2
3
4
# default base image
ARG REMOTE_VLLM="0"
ARG COMMON_WORKDIR=/app
ARG BASE_IMAGE=rocm/vllm-dev:base
5
6
7
8
9
# AMD NIC backend
ARG NIC_BACKEND=none
# AMD AINIC apt repo settings
ARG AINIC_VERSION=1.117.5
ARG UBUNTU_CODENAME=jammy
10

11
12
13
14
15
16
17
18
# Sccache configuration (only used in release pipeline)
ARG USE_SCCACHE
ARG SCCACHE_DOWNLOAD_URL
ARG SCCACHE_ENDPOINT
ARG SCCACHE_BUCKET_NAME=vllm-build-sccache
ARG SCCACHE_REGION_NAME=us-west-2
ARG SCCACHE_S3_NO_CREDENTIALS=0

19
FROM ${BASE_IMAGE} AS base
20

21
22
ARG ARG_PYTORCH_ROCM_ARCH
ENV PYTORCH_ROCM_ARCH=${ARG_PYTORCH_ROCM_ARCH:-${PYTORCH_ROCM_ARCH}}
23
24

# Install some basic utilities
25
RUN apt-get update -q -y && apt-get install -q -y \
26
    sqlite3 libsqlite3-dev libfmt-dev libmsgpack-dev libsuitesparse-dev \
27
28
    apt-transport-https ca-certificates wget curl \
    libnuma-dev
29
RUN python3 -m pip install --upgrade pip
30
31
32
33
34
35
36
# Remove sccache only if not using sccache (it exists in base image from Dockerfile.rocm_base)
ARG USE_SCCACHE
RUN if [ "$USE_SCCACHE" != "1" ]; then \
        apt-get purge -y sccache || true; \
        python3 -m pip uninstall -y sccache || true; \
        rm -f "$(which sccache)" || true; \
    fi
37

38
39
40
41
42
# Install UV — download first, then run, so a curl failure is not masked by the pipe
RUN curl -LsSf --retry 3 --retry-delay 5 https://astral.sh/uv/install.sh -o /tmp/uv-install.sh \
    && env UV_INSTALL_DIR="/usr/local/bin" sh /tmp/uv-install.sh \
    && rm -f /tmp/uv-install.sh \
    && uv --version
43
44
45
46
47
48
49
50

# This timeout (in seconds) is necessary when installing some dependencies via uv since it's likely to time out
# Reference: https://github.com/astral-sh/uv/pull/1694
ENV UV_HTTP_TIMEOUT=500
ENV UV_INDEX_STRATEGY="unsafe-best-match"
# Use copy mode to avoid hardlink failures with Docker cache mounts
ENV UV_LINK_MODE=copy

51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
# Install sccache if USE_SCCACHE is enabled (for release builds)
ARG USE_SCCACHE
ARG SCCACHE_DOWNLOAD_URL
ARG SCCACHE_ENDPOINT
ARG SCCACHE_BUCKET_NAME
ARG SCCACHE_REGION_NAME
ARG SCCACHE_S3_NO_CREDENTIALS
RUN if [ "$USE_SCCACHE" = "1" ]; then \
        if command -v sccache >/dev/null 2>&1; then \
            echo "sccache already installed, skipping installation"; \
            sccache --version; \
        else \
            echo "Installing sccache..." \
            && SCCACHE_ARCH="x86_64" \
            && SCCACHE_VERSION="v0.8.1" \
            && SCCACHE_DL_URL="${SCCACHE_DOWNLOAD_URL:-https://github.com/mozilla/sccache/releases/download/${SCCACHE_VERSION}/sccache-${SCCACHE_VERSION}-${SCCACHE_ARCH}-unknown-linux-musl.tar.gz}" \
            && curl -L -o /tmp/sccache.tar.gz ${SCCACHE_DL_URL} \
            && tar -xzf /tmp/sccache.tar.gz -C /tmp \
            && mv /tmp/sccache-${SCCACHE_VERSION}-${SCCACHE_ARCH}-unknown-linux-musl/sccache /usr/bin/sccache \
            && chmod +x /usr/bin/sccache \
            && rm -rf /tmp/sccache.tar.gz /tmp/sccache-${SCCACHE_VERSION}-${SCCACHE_ARCH}-unknown-linux-musl \
            && sccache --version; \
        fi; \
    fi

# Set sccache environment variables only when USE_SCCACHE=1
# This prevents S3 config from leaking into images when sccache is not used
ARG USE_SCCACHE
ENV SCCACHE_BUCKET=${USE_SCCACHE:+${SCCACHE_BUCKET_NAME}}
ENV SCCACHE_REGION=${USE_SCCACHE:+${SCCACHE_REGION_NAME}}
ENV SCCACHE_S3_NO_CREDENTIALS=${USE_SCCACHE:+${SCCACHE_S3_NO_CREDENTIALS}}
ENV SCCACHE_IDLE_TIMEOUT=${USE_SCCACHE:+0}

84
85
86
87
88
89
90
91
92
93
94
ARG COMMON_WORKDIR
WORKDIR ${COMMON_WORKDIR}


# -----------------------
# vLLM fetch stages
FROM base AS fetch_vllm_0
ONBUILD COPY ./ vllm/
FROM base AS fetch_vllm_1
ARG VLLM_REPO="https://github.com/vllm-project/vllm.git"
ARG VLLM_BRANCH="main"
95
96
ENV VLLM_REPO=${VLLM_REPO}
ENV VLLM_BRANCH=${VLLM_BRANCH}
97
98
ONBUILD RUN git clone ${VLLM_REPO} \
	    && cd vllm \
99
	    && git fetch -v --prune -- origin ${VLLM_BRANCH} \
100
101
102
103
	    && git checkout FETCH_HEAD \
        && if [ ${VLLM_REPO} != "https://github.com/vllm-project/vllm.git" ] ; then \
               git remote add upstream "https://github.com/vllm-project/vllm.git" \
               && git fetch upstream ; fi
104
105
106
107
108
FROM fetch_vllm_${REMOTE_VLLM} AS fetch_vllm

# -----------------------
# vLLM build stages
FROM fetch_vllm AS build_vllm
109
# Build vLLM (setup.py auto-detects sccache in PATH)
110
RUN cd vllm \
111
    && python3 -m pip install -r requirements/rocm.txt \
112
113
114
115
116
    && python3 setup.py clean --all  \
    && python3 setup.py bdist_wheel --dist-dir=dist
FROM scratch AS export_vllm
ARG COMMON_WORKDIR
COPY --from=build_vllm ${COMMON_WORKDIR}/vllm/dist/*.whl /
117
COPY --from=build_vllm ${COMMON_WORKDIR}/vllm/requirements /requirements
118
119
120
COPY --from=build_vllm ${COMMON_WORKDIR}/vllm/benchmarks /benchmarks
COPY --from=build_vllm ${COMMON_WORKDIR}/vllm/tests /tests
COPY --from=build_vllm ${COMMON_WORKDIR}/vllm/examples /examples
121
COPY --from=build_vllm ${COMMON_WORKDIR}/vllm/docker/Dockerfile.rocm /docker/
122
COPY --from=build_vllm ${COMMON_WORKDIR}/vllm/.buildkite /.buildkite
123
COPY --from=build_vllm ${COMMON_WORKDIR}/vllm/vllm/v1 /vllm_v1
124

125
126
# RIXL/UCX build stages
FROM base AS build_rixl
127
ARG RIXL_BRANCH="bf4a7214"
128
ARG RIXL_REPO="https://github.com/ROCm/RIXL.git"
129
130
ARG UCX_BRANCH="7009d7a1"
ARG UCX_REPO="https://github.com/openucx/ucx.git"
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
ENV ROCM_PATH=/opt/rocm
ENV UCX_HOME=/usr/local/ucx
ENV RIXL_HOME=/usr/local/rixl
ENV RIXL_BENCH_HOME=/usr/local/rixl_bench

# RIXL build system dependences and RDMA support
RUN apt-get -y update && apt-get -y install autoconf libtool pkg-config \
    libgrpc-dev \
    libgrpc++-dev \
    libprotobuf-dev \
    protobuf-compiler-grpc \
    libcpprest-dev \
    libaio-dev \
    librdmacm1 \
    librdmacm-dev \
    libibverbs1 \
    libibverbs-dev \
    ibverbs-utils \
    rdmacm-utils \
    ibverbs-providers \
    && rm -rf /var/lib/apt/lists/*

RUN uv pip install --system meson auditwheel patchelf tomlkit

RUN cd /usr/local/src && \
    git clone ${UCX_REPO} &&  \
    cd ucx  && \
    git checkout ${UCX_BRANCH} && \
    ./autogen.sh && \
    mkdir build && cd build && \
    ../configure \
        --prefix=/usr/local/ucx \
        --enable-shared \
        --disable-static \
        --disable-doxygen-doc \
        --enable-optimizations \
        --enable-devel-headers \
168
        --with-rocm=${ROCM_PATH} \
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
        --with-verbs \
        --with-dm \
        --enable-mt && \
    make -j && \
    make install

ENV PATH=/usr/local/ucx/bin:$PATH
ENV LD_LIBRARY_PATH=${UCX_HOME}/lib:${LD_LIBRARY_PATH}

RUN git clone ${RIXL_REPO} /opt/rixl && \
    cd /opt/rixl && \
    git checkout ${RIXL_BRANCH} && \
    meson setup build --prefix=${RIXL_HOME} \
                     -Ducx_path=${UCX_HOME} \
                     -Drocm_path=${ROCM_PATH} && \
    cd build && \
    ninja && \
    ninja install

# Generate RIXL wheel
189
190
191
192
193
194
# Exclude libcore and libpull from auditwheel: transitive dependencies
# that are not shipped in the wheel and vary across base images.
RUN cd /opt/rixl && \
    sed -i "s/--exclude 'libamdhip64\*'/--exclude 'libamdhip64*' --exclude 'libcore*' --exclude 'libpull*'/" \
        contrib/build-wheel.sh && \
    mkdir -p /app/install && \
195
196
197
198
199
200
    ./contrib/build-wheel.sh \
        --output-dir /app/install \
        --rocm-dir ${ROCM_PATH} \
        --ucx-plugins-dir ${UCX_HOME}/lib/ucx \
        --nixl-plugins-dir ${RIXL_HOME}/lib/x86_64-linux-gnu/plugins

201
202
203
204
# DeepEP build stage
FROM base AS build_deep
ARG ROCSHMEM_BRANCH="ba0bf0f3"
ARG ROCSHMEM_REPO="https://github.com/ROCm/rocm-systems.git"
205
ARG DEEPEP_BRANCH="5d90af8b"
206
207
ARG DEEPEP_REPO="https://github.com/ROCm/DeepEP.git"
ARG DEEPEP_NIC="cx7"
208
ARG DEEPEP_ROCM_ARCH="gfx942;gfx950"
209
210
211
212
213
214
215
ENV ROCSHMEM_DIR=/opt/rocshmem

RUN git clone ${ROCSHMEM_REPO} \
 && cd rocm-systems \
 && git checkout ${ROCSHMEM_BRANCH} \
 && mkdir -p projects/rocshmem/build \
 && cd projects/rocshmem/build \
216
217
218
219
220
 && bash ../scripts/build_configs/all_backends \
      -DCMAKE_INSTALL_PREFIX="${ROCSHMEM_DIR}" \
      -DROCM_PATH=/opt/rocm \
      -DGPU_TARGETS="${DEEPEP_ROCM_ARCH}" \
      -DUSE_EXTERNAL_MPI=OFF
221
222
223
224
225
226
227

# Build DeepEP wheel.
# DeepEP looks for rocshmem at ROCSHMEM_DIR.
RUN git clone ${DEEPEP_REPO} \
 && cd DeepEP \
 && git checkout ${DEEPEP_BRANCH} \
 && python3 setup.py --variant rocm --nic ${DEEPEP_NIC} bdist_wheel --dist-dir=/app/deep_install
228

229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
# MoRI runtime dependencies live in Dockerfile.rocm so NIC backend changes do
# not force users to rebuild the long-lived Dockerfile.rocm_base image.
FROM base AS mori_base
ARG NIC_BACKEND
ARG AINIC_VERSION
ARG UBUNTU_CODENAME
RUN /bin/bash -lc 'set -euo pipefail; \
 echo "[MORI] Install MoRI proxy deps"; \
 pip install --quiet --ignore-installed blinker && \
 pip install --quiet quart msgpack aiohttp pyzmq; \
 echo "[MORI] NIC_BACKEND=${NIC_BACKEND}"; \
  \
  # NIC backend deps — mori auto-detects NIC at runtime (MORI_DEVICE_NIC env var override).
  # Only vendor packages are installed here for dlopen (e.g. libionic.so); no compile-time flags needed.
  case "${NIC_BACKEND}" in \
    # default: mlx5
    none) \
      ;; \
    # AMD NIC
    ainic) \
      apt-get update && apt-get install -y --no-install-recommends ca-certificates curl gnupg apt-transport-https && \
      rm -rf /var/lib/apt/lists/* && mkdir -p /etc/apt/keyrings; \
      curl -fsSL https://repo.radeon.com/rocm/rocm.gpg.key | gpg --dearmor > /etc/apt/keyrings/amdainic.gpg; \
      echo "deb [arch=amd64 signed-by=/etc/apt/keyrings/amdainic.gpg] https://repo.radeon.com/amdainic/pensando/ubuntu/${AINIC_VERSION} ${UBUNTU_CODENAME} main" \
        > /etc/apt/sources.list.d/amdainic.list; \
      apt-get update && apt-get install -y --no-install-recommends \
          libionic-dev \
          ionic-common \
      ; \
      rm -rf /var/lib/apt/lists/*; \
      ;; \
    # TODO: Add Broadcom bnxt packages/repos here later.
    # bnxt) \
    #   echo "[MORI] Add Broadcom bnxt packages/repos here later."; \
    #   ;; \
    *) \
      echo "ERROR: unknown NIC_BACKEND=${NIC_BACKEND}. Use one of: none, ainic"; \
      exit 2; \
      ;; \
  esac;'

270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
# -----------------------
# vLLM wheel release build stage (for building distributable wheels)
# This stage pins dependencies to custom ROCm wheel versions and handles version detection
FROM fetch_vllm AS build_vllm_wheel_release

ARG COMMON_WORKDIR

# Create /install directory for custom wheels
RUN mkdir -p /install

# Copy custom ROCm wheels from docker/context if they exist
# COPY ensures Docker cache is invalidated when wheels change
# .keep file ensures directory always exists for COPY to work
COPY docker/context/base-wheels/ /tmp/base-wheels/
# This is how we know if we are building for a wheel release or not.
# If there are not wheels found there, we are not building for a wheel release. 
# So we exit with an error. To skip this stage.
RUN if [ -n "$(ls /tmp/base-wheels/*.whl 2>/dev/null)" ]; then \
        echo "Found custom wheels - copying to /install"; \
        cp /tmp/base-wheels/*.whl /install/ && \
        echo "Copied custom wheels:"; \
        ls -lh /install/; \
    else \
        echo "ERROR: No custom wheels found in docker/context/base-wheels/"; \
        echo "Wheel releases require pre-built ROCm wheels."; \
        exit 1; \
    fi

# GIT_REPO_CHECK: Verify repo is clean and tags are available (for release builds)
# This matches CUDA's Dockerfile behavior for proper version detection via setuptools_scm
ARG GIT_REPO_CHECK=0
RUN if [ "$GIT_REPO_CHECK" != "0" ]; then \
        echo "Running repository checks..."; \
        cd vllm && bash tools/check_repo.sh; \
    fi

# Extract version from git BEFORE any modifications (pin_rocm_dependencies.py modifies requirements/rocm.txt)
# This ensures setuptools_scm sees clean repo state for version detection
RUN --mount=type=bind,source=.git,target=vllm/.git \
    cd vllm \
310
    && pip install setuptools_scm regex \
311
312
313
314
    && VLLM_VERSION=$(python3 -c "import setuptools_scm; print(setuptools_scm.get_version())") \
    && echo "Detected vLLM version: ${VLLM_VERSION}" \
    && echo "${VLLM_VERSION}" > /tmp/vllm_version.txt

315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
# Fail if git-based package dependencies are found in requirements files
# (uv doesn't handle git+ URLs well, and packages should be distributed on PyPI)
# Extra notes: pip install is able to handle git+ URLs, but uv doesn't.
RUN echo "Checking for git-based packages in requirements files..." \
    && echo "Checking common.txt for git-based packages:" \
    && if grep -q 'git+' ${COMMON_WORKDIR}/vllm/requirements/common.txt; then \
         echo "ERROR: Git-based packages found in common.txt:"; \
         grep 'git+' ${COMMON_WORKDIR}/vllm/requirements/common.txt; \
         echo "Please publish these packages to PyPI instead of using git dependencies."; \
         exit 1; \
       else \
         echo "  ✓ No git-based packages found in common.txt"; \
       fi \
    && echo "Checking rocm.txt for git-based packages:" \
    && if grep -q 'git+' ${COMMON_WORKDIR}/vllm/requirements/rocm.txt; then \
         echo "ERROR: Git-based packages found in rocm.txt:"; \
         grep 'git+' ${COMMON_WORKDIR}/vllm/requirements/rocm.txt; \
         echo "Please publish these packages to PyPI instead of using git dependencies."; \
         exit 1; \
       else \
         echo "  ✓ No git-based packages found in rocm.txt"; \
       fi \
    && echo "All requirements files are clean - no git-based packages found"

339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
# Pin vLLM dependencies to exact versions of custom ROCm wheels
# This ensures 'pip install vllm' automatically installs correct torch/triton/torchvision/amdsmi
COPY tools/vllm-rocm/pin_rocm_dependencies.py /tmp/pin_rocm_dependencies.py
RUN echo "Pinning vLLM dependencies to custom wheel versions..." \
    && python3 /tmp/pin_rocm_dependencies.py /install ${COMMON_WORKDIR}/vllm/requirements/rocm.txt

# Install dependencies using custom wheels from /install
RUN cd vllm \
    && echo "Building vLLM with custom wheels from /install" \
    && python3 -m pip install --find-links /install -r requirements/rocm.txt \
    && python3 setup.py clean --all

# Build wheel using pre-extracted version to avoid dirty state from modified requirements/rocm.txt
# (setup.py auto-detects sccache in PATH)
RUN --mount=type=bind,source=.git,target=vllm/.git \
    cd vllm \
    && export SETUPTOOLS_SCM_PRETEND_VERSION=$(cat /tmp/vllm_version.txt) \
    && echo "Building wheel with version: ${SETUPTOOLS_SCM_PRETEND_VERSION}" \
    && python3 setup.py bdist_wheel --dist-dir=dist

FROM scratch AS export_vllm_wheel_release
ARG COMMON_WORKDIR
COPY --from=build_vllm_wheel_release ${COMMON_WORKDIR}/vllm/dist/*.whl /
COPY --from=build_vllm_wheel_release ${COMMON_WORKDIR}/vllm/requirements /requirements
COPY --from=build_vllm_wheel_release ${COMMON_WORKDIR}/vllm/benchmarks /benchmarks
COPY --from=build_vllm_wheel_release ${COMMON_WORKDIR}/vllm/tests /tests
COPY --from=build_vllm_wheel_release ${COMMON_WORKDIR}/vllm/examples /examples
COPY --from=build_vllm_wheel_release ${COMMON_WORKDIR}/vllm/docker/Dockerfile.rocm /docker/
COPY --from=build_vllm_wheel_release ${COMMON_WORKDIR}/vllm/.buildkite /.buildkite
COPY --from=build_vllm_wheel_release ${COMMON_WORKDIR}/vllm/vllm/v1 /vllm_v1

370
371
# -----------------------
# Test vLLM image
372
FROM mori_base AS test
373
374
375

RUN python3 -m pip install --upgrade pip && rm -rf /var/lib/apt/lists/*

376
377
# Install vLLM using uv (inherited from base stage)
# Note: No -U flag to avoid upgrading PyTorch ROCm to CUDA version
378
RUN --mount=type=bind,from=export_vllm,src=/,target=/install \
379
    --mount=type=cache,target=/root/.cache/uv \
380
    cd /install \
381
    && uv pip install --system -r requirements/rocm.txt \
382
    && uv pip install --system -r requirements/test/rocm.txt \
383
    && pip uninstall -y vllm \
384
    && uv pip install --system *.whl
385

386
387
388
389
# Persist the built wheel in the image so python_only_compile_rocm.sh can
# reinstall it after removing compilers. The bind-mounted /install contents
# above are not available once that RUN step completes.
COPY --from=export_vllm /*.whl /opt/vllm-wheels/
390

391
392
393
394
# Install RIXL wheel
RUN --mount=type=bind,from=build_rixl,src=/app/install,target=/rixl_install \
    uv pip install --system /rixl_install/*.whl

395
396
397
398
399
# Install DeepEP wheel
RUN --mount=type=bind,from=build_deep,src=/app/deep_install,target=/deep_install \
    uv pip install --system /deep_install/*.whl
COPY --from=build_deep /opt/rocshmem /opt/rocshmem

400
401
402
403
404
405
406
407
# RIXL/MoRIIO runtime dependencies (RDMA userspace libraries)
RUN apt-get update -q -y && apt-get install -q -y \
    librdmacm1 \
    libibverbs1 \
    ibverbs-providers \
    ibverbs-utils \
    && rm -rf /var/lib/apt/lists/*

408
409
410
WORKDIR /vllm-workspace
ARG COMMON_WORKDIR
COPY --from=build_vllm ${COMMON_WORKDIR}/vllm /vllm-workspace
411

412
413
414
# install development dependencies (for testing)
RUN cd /vllm-workspace \
    && python3 -m pip install -e tests/vllm_test_utils \
415
    && python3 -m pip install pytest-shard
416

417
# enable fast downloads from hf (for testing)
418
419
420
421
ENV HF_XET_HIGH_PERFORMANCE=1

# increase timeout for hf downloads (for testing)
ENV HF_HUB_DOWNLOAD_TIMEOUT 60
422

423
424
425
426
427
428
429
430
# install audio decode package `torchcodec` from source (required due to 
# ROCm and torch version mismatch) for tests with datasets package
COPY tools/install_torchcodec_rocm.sh /tmp/install_torchcodec.sh
RUN bash /tmp/install_torchcodec.sh \
    && rm /tmp/install_torchcodec.sh \
    && apt-get clean \
    && rm -rf /var/lib/apt/lists/*

431
# Copy in the v1 package (for python-only install test group)
432
433
COPY --from=export_vllm /vllm_v1 /usr/local/lib/python${PYTHON_VERSION}/dist-packages/vllm/v1

434
435
436
437
438
# Set MIOPEN ENVS to resolve performance regressions in MIOpen 3D convolution kernel
# See: https://github.com/pytorch/pytorch/issues/169857
ENV MIOPEN_DEBUG_CONV_DIRECT=0
ENV MIOPEN_DEBUG_CONV_GEMM=0

439
440
441
442
# Use legacy IPC mode for HSA to avoid GPU memory pinning issues with UCX rocm_ipc
# See: https://github.com/ROCm/rocm-libraries/issues/6266
ENV HSA_ENABLE_IPC_MODE_LEGACY=1

443
444
445
446
447
# Source code is used in the `python_only_compile.sh` test
# We hide it inside `src/` so that this source code
# will not be imported by other tests
RUN mkdir src && mv vllm src/vllm

448
# This is a workaround to ensure pytest exits with the correct status code in CI tests.
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
RUN printf '%s\n' \
    'import os' \
    '' \
    '_exit_code = 1' \
    '' \
    'def pytest_sessionfinish(session, exitstatus):' \
    '    global _exit_code' \
    '    _exit_code = int(exitstatus)' \
    '' \
    'def pytest_unconfigure(config):' \
    '    import sys' \
    '    sys.stdout.flush()' \
    '    sys.stderr.flush()' \
    '    os._exit(_exit_code)' \
    > /vllm-workspace/conftest.py
464

465
466
# -----------------------
# Final vLLM image
467
FROM mori_base AS final
468

469
RUN python3 -m pip install --upgrade pip && rm -rf /var/lib/apt/lists/*
470
471
472
473
474
475
476
477
478
479
480
481
482

# Clean up sccache from release image (not needed at runtime)
# This removes the binary and wrappers that may have been installed during build
RUN rm -f /usr/bin/sccache || true \
    && rm -rf /opt/sccache-wrappers || true

# Unset sccache environment variables for the release image
# This prevents S3 bucket config from leaking into production images
ENV SCCACHE_BUCKET=
ENV SCCACHE_REGION=
ENV SCCACHE_S3_NO_CREDENTIALS=
ENV SCCACHE_IDLE_TIMEOUT=

483
484
485
486
487
488
# Error related to odd state for numpy 1.20.3 where there is no METADATA etc, but an extra LICENSES_bundled.txt.
# Manually remove it so that later steps of numpy upgrade can continue
RUN case "$(which python3)" in \
        *"/opt/conda/envs/py_3.9"*) \
            rm -rf /opt/conda/envs/py_3.9/lib/python3.9/site-packages/numpy-1.20.3.dist-info/;; \
        *) ;; esac
489

490
491
RUN --mount=type=cache,target=/root/.cache/uv \
    uv pip install --system --upgrade huggingface-hub[cli]
492

493
494
# Install vLLM using uv (inherited from base stage)
# Note: No -U flag to avoid upgrading PyTorch ROCm to CUDA version
495
RUN --mount=type=bind,from=export_vllm,src=/,target=/install \
496
    --mount=type=cache,target=/root/.cache/uv \
497
    cd /install \
498
    && uv pip install --system -r requirements/rocm.txt \
499
    && pip uninstall -y vllm \
500
    && uv pip install --system *.whl
501
502

ARG COMMON_WORKDIR
503
ARG BASE_IMAGE
504
505
ARG NIC_BACKEND
ARG AINIC_VERSION
506
507
508
509

# Copy over the benchmark scripts as well
COPY --from=export_vllm /benchmarks ${COMMON_WORKDIR}/vllm/benchmarks
COPY --from=export_vllm /examples ${COMMON_WORKDIR}/vllm/examples
510
COPY --from=export_vllm /docker ${COMMON_WORKDIR}/vllm/docker
511

512
ENV TOKENIZERS_PARALLELISM=false
513

514
515
516
# ENV that can improve safe tensor loading, and end-to-end time
ENV SAFETENSORS_FAST_GPU=1

517
518
# Performance environment variable.
ENV HIP_FORCE_DEV_KERNARG=1
youkaichao's avatar
youkaichao committed
519

520
521
522
# Workaround for ROCm profiler limits
RUN echo "ROCTRACER_MAX_EVENTS=10000000" > ${COMMON_WORKDIR}/libkineto.conf
ENV KINETO_CONFIG="${COMMON_WORKDIR}/libkineto.conf"
523
524
525
RUN echo "VLLM_BASE_IMAGE=${BASE_IMAGE}" >> ${COMMON_WORKDIR}/versions.txt \
    && echo "MORI_NIC_BACKEND=${NIC_BACKEND}" >> ${COMMON_WORKDIR}/versions.txt \
    && echo "AINIC_VERSION=${AINIC_VERSION}" >> ${COMMON_WORKDIR}/versions.txt
526

527
CMD ["/bin/bash"]
528
529

#Set entrypoint for vllm-openai official images
530
FROM final AS vllm-openai
531
ENTRYPOINT ["vllm", "serve"]