Dockerfile.rocm 5.29 KB
Newer Older
1
2
3
4
# default base image
ARG REMOTE_VLLM="0"
ARG COMMON_WORKDIR=/app
ARG BASE_IMAGE=rocm/vllm-dev:base
5

6
FROM ${BASE_IMAGE} AS base
7

8
9
ARG ARG_PYTORCH_ROCM_ARCH
ENV PYTORCH_ROCM_ARCH=${ARG_PYTORCH_ROCM_ARCH:-${PYTORCH_ROCM_ARCH}}
10
11
ENV RAY_EXPERIMENTAL_NOSET_ROCR_VISIBLE_DEVICES=1
ENV RAY_EXPERIMENTAL_NOSET_HIP_VISIBLE_DEVICES=1
12
13

# Install some basic utilities
14
RUN apt-get update -q -y && apt-get install -q -y \
15
16
    sqlite3 libsqlite3-dev libfmt-dev libmsgpack-dev libsuitesparse-dev \
    apt-transport-https ca-certificates wget curl
17
# Remove sccache
18
RUN python3 -m pip install --upgrade pip
19
RUN apt-get purge -y sccache; python3 -m pip uninstall -y sccache; rm -f "$(which sccache)"
20
21

# Install UV
22
RUN curl -LsSf https://astral.sh/uv/install.sh | env UV_INSTALL_DIR="/usr/local/bin" sh
23
24
25
26
27
28
29
30

# This timeout (in seconds) is necessary when installing some dependencies via uv since it's likely to time out
# Reference: https://github.com/astral-sh/uv/pull/1694
ENV UV_HTTP_TIMEOUT=500
ENV UV_INDEX_STRATEGY="unsafe-best-match"
# Use copy mode to avoid hardlink failures with Docker cache mounts
ENV UV_LINK_MODE=copy

31
32
33
34
35
36
37
38
39
40
41
42
43
ARG COMMON_WORKDIR
WORKDIR ${COMMON_WORKDIR}


# -----------------------
# vLLM fetch stages
FROM base AS fetch_vllm_0
ONBUILD COPY ./ vllm/
FROM base AS fetch_vllm_1
ARG VLLM_REPO="https://github.com/vllm-project/vllm.git"
ARG VLLM_BRANCH="main"
ONBUILD RUN git clone ${VLLM_REPO} \
	    && cd vllm \
44
	    && git fetch -v --prune -- origin ${VLLM_BRANCH} \
45
46
47
48
	    && git checkout FETCH_HEAD \
        && if [ ${VLLM_REPO} != "https://github.com/vllm-project/vllm.git" ] ; then \
               git remote add upstream "https://github.com/vllm-project/vllm.git" \
               && git fetch upstream ; fi
49
50
51
52
53
54
55
FROM fetch_vllm_${REMOTE_VLLM} AS fetch_vllm

# -----------------------
# vLLM build stages
FROM fetch_vllm AS build_vllm
# Build vLLM
RUN cd vllm \
56
    && python3 -m pip install -r requirements/rocm.txt \
57
58
59
60
61
    && python3 setup.py clean --all  \
    && python3 setup.py bdist_wheel --dist-dir=dist
FROM scratch AS export_vllm
ARG COMMON_WORKDIR
COPY --from=build_vllm ${COMMON_WORKDIR}/vllm/dist/*.whl /
62
COPY --from=build_vllm ${COMMON_WORKDIR}/vllm/requirements /requirements
63
64
65
COPY --from=build_vllm ${COMMON_WORKDIR}/vllm/benchmarks /benchmarks
COPY --from=build_vllm ${COMMON_WORKDIR}/vllm/tests /tests
COPY --from=build_vllm ${COMMON_WORKDIR}/vllm/examples /examples
66
COPY --from=build_vllm ${COMMON_WORKDIR}/vllm/docker/Dockerfile.rocm /docker/
67
COPY --from=build_vllm ${COMMON_WORKDIR}/vllm/.buildkite /.buildkite
68
COPY --from=build_vllm ${COMMON_WORKDIR}/vllm/vllm/v1 /vllm_v1
69
70
71
72
73
74
75

# -----------------------
# Test vLLM image
FROM base AS test

RUN python3 -m pip install --upgrade pip && rm -rf /var/lib/apt/lists/*

76
77
# Install vLLM using uv (inherited from base stage)
# Note: No -U flag to avoid upgrading PyTorch ROCm to CUDA version
78
RUN --mount=type=bind,from=export_vllm,src=/,target=/install \
79
    --mount=type=cache,target=/root/.cache/uv \
80
    cd /install \
81
82
    && uv pip install --system -r requirements/rocm.txt \
    && uv pip install --system -r requirements/rocm-test.txt \
83
    && pip uninstall -y vllm \
84
    && uv pip install --system *.whl
85
86
87
88

WORKDIR /vllm-workspace
ARG COMMON_WORKDIR
COPY --from=build_vllm ${COMMON_WORKDIR}/vllm /vllm-workspace
89

90
91
92
# install development dependencies (for testing)
RUN cd /vllm-workspace \
    && python3 -m pip install -e tests/vllm_test_utils \
93
    && python3 -m pip install pytest-shard
94

95
96
97
98
99
# enable fast downloads from hf (for testing)
RUN --mount=type=cache,target=/root/.cache/uv \
    uv pip install --system hf_transfer
ENV HF_HUB_ENABLE_HF_TRANSFER=1

100
# Copy in the v1 package (for python-only install test group)
101
102
103
104
105
106
107
COPY --from=export_vllm /vllm_v1 /usr/local/lib/python${PYTHON_VERSION}/dist-packages/vllm/v1

# Source code is used in the `python_only_compile.sh` test
# We hide it inside `src/` so that this source code
# will not be imported by other tests
RUN mkdir src && mv vllm src/vllm

108
109
# -----------------------
# Final vLLM image
110
FROM base AS final
111

112
113
114
115
116
117
118
RUN python3 -m pip install --upgrade pip && rm -rf /var/lib/apt/lists/*
# Error related to odd state for numpy 1.20.3 where there is no METADATA etc, but an extra LICENSES_bundled.txt.
# Manually remove it so that later steps of numpy upgrade can continue
RUN case "$(which python3)" in \
        *"/opt/conda/envs/py_3.9"*) \
            rm -rf /opt/conda/envs/py_3.9/lib/python3.9/site-packages/numpy-1.20.3.dist-info/;; \
        *) ;; esac
119

120
121
RUN --mount=type=cache,target=/root/.cache/uv \
    uv pip install --system --upgrade huggingface-hub[cli]
122

123
124
# Install vLLM using uv (inherited from base stage)
# Note: No -U flag to avoid upgrading PyTorch ROCm to CUDA version
125
RUN --mount=type=bind,from=export_vllm,src=/,target=/install \
126
    --mount=type=cache,target=/root/.cache/uv \
127
    cd /install \
128
    && uv pip install --system -r requirements/rocm.txt \
129
    && pip uninstall -y vllm \
130
    && uv pip install --system *.whl
131
132
133
134
135
136

ARG COMMON_WORKDIR

# Copy over the benchmark scripts as well
COPY --from=export_vllm /benchmarks ${COMMON_WORKDIR}/vllm/benchmarks
COPY --from=export_vllm /examples ${COMMON_WORKDIR}/vllm/examples
137
COPY --from=export_vllm /docker ${COMMON_WORKDIR}/vllm/docker
138

139
ENV TOKENIZERS_PARALLELISM=false
140

141
142
143
# ENV that can improve safe tensor loading, and end-to-end time
ENV SAFETENSORS_FAST_GPU=1

144
145
# Performance environment variable.
ENV HIP_FORCE_DEV_KERNARG=1
youkaichao's avatar
youkaichao committed
146

147
CMD ["/bin/bash"]