"vscode:/vscode.git/clone" did not exist on "708d897e96f36c3e2cc71565fab4e22d1cbcca70"
Dockerfile.rocm 5.55 KB
Newer Older
1
2
# Default ROCm 6.2 base image
ARG BASE_IMAGE="rocm/pytorch:rocm6.2_ubuntu20.04_py3.9_pytorch_release_2.3.0"
3
4
5
6

# Default ROCm ARCHes to build vLLM for.
ARG PYTORCH_ROCM_ARCH="gfx908;gfx90a;gfx942;gfx1100"

7
8
# Whether to install CK-based flash-attention
# If 0, will not install flash-attention
9
ARG BUILD_FA="1"
10
ARG FA_GFX_ARCHS="gfx90a;gfx942"
11
ARG FA_BRANCH="3cea2fb"
12

13
# Whether to build triton on rocm
14
ARG BUILD_TRITON="1"
15
ARG TRITON_BRANCH="e192dba"
16

17
18
19
20
21
### Base image build stage
FROM $BASE_IMAGE AS base

# Import arg(s) defined before this build stage
ARG PYTORCH_ROCM_ARCH
22
23

# Install some basic utilities
24
RUN apt-get update && apt-get install python3 python3-pip -y
25
26
27
28
29
30
31
32
33
34
35
RUN apt-get update && apt-get install -y \
    curl \
    ca-certificates \
    sudo \
    git \
    bzip2 \
    libx11-6 \
    build-essential \
    wget \
    unzip \
    tmux \
Simon Mo's avatar
Simon Mo committed
36
    ccache \
37
38
 && rm -rf /var/lib/apt/lists/*

39
# When launching the container, mount the code directory to /vllm-workspace
40
ARG APP_MOUNT=/vllm-workspace
41
42
WORKDIR ${APP_MOUNT}

43
RUN python3 -m pip install --upgrade pip
44
45
# Remove sccache so it doesn't interfere with ccache
# TODO: implement sccache support across components
46
RUN apt-get purge -y sccache; python3 -m pip uninstall -y sccache; rm -f "$(which sccache)"
47
48

# Install torch == 2.6.0 on ROCm
49
50
RUN --mount=type=cache,target=/root/.cache/pip \
    case "$(ls /opt | grep -Po 'rocm-[0-9]\.[0-9]')" in \
51
        *"rocm-6.2"*) \
52
            python3 -m pip uninstall -y torch torchvision \
53
            && python3 -m pip install --pre \
54
                torch==2.6.0.dev20241113+rocm6.2 \
55
                'setuptools-scm>=8' \
56
                torchvision==0.20.0.dev20241113+rocm6.2 \
57
                --extra-index-url https://download.pytorch.org/whl/nightly/rocm6.2;; \
58
        *) ;; esac
59
60
61
62
63
64

ENV LLVM_SYMBOLIZER_PATH=/opt/rocm/llvm/bin/llvm-symbolizer
ENV PATH=$PATH:/opt/rocm/bin:/libtorch/bin:
ENV LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/opt/rocm/lib/:/libtorch/lib:
ENV CPLUS_INCLUDE_PATH=$CPLUS_INCLUDE_PATH:/libtorch/include:/libtorch/include/torch/csrc/api/include/:/opt/rocm/include/:

65
66
67
68
69
70
71
72
ENV PYTORCH_ROCM_ARCH=${PYTORCH_ROCM_ARCH}
ENV CCACHE_DIR=/root/.cache/ccache


### AMD-SMI build stage
FROM base AS build_amdsmi
# Build amdsmi wheel always
RUN cd /opt/rocm/share/amd_smi \
73
    && python3 -m pip wheel . --wheel-dir=/install
74
75
76
77
78
79
80
81
82
83


### Flash-Attention wheel build stage
FROM base AS build_fa
ARG BUILD_FA
ARG FA_GFX_ARCHS
ARG FA_BRANCH
# Build ROCm flash-attention wheel if `BUILD_FA = 1`
RUN --mount=type=cache,target=${CCACHE_DIR} \
    if [ "$BUILD_FA" = "1" ]; then \
84
85
86
87
88
89
90
        mkdir -p libs \
        && cd libs \
        && git clone https://github.com/ROCm/flash-attention.git \
        && cd flash-attention \
        && git checkout "${FA_BRANCH}" \
        && git submodule update --init \
        && GPU_ARCHS="${FA_GFX_ARCHS}" python3 setup.py bdist_wheel --dist-dir=/install; \
91
92
    # Create an empty directory otherwise as later build stages expect one
    else mkdir -p /install; \
93
    fi
94
95


96
97
98
99
100
101
102
### Triton wheel build stage
FROM base AS build_triton
ARG BUILD_TRITON
ARG TRITON_BRANCH
# Build triton wheel if `BUILD_TRITON = 1`
RUN --mount=type=cache,target=${CCACHE_DIR} \
    if [ "$BUILD_TRITON" = "1" ]; then \
103
104
    mkdir -p libs \
    && cd libs \
105
    && python3 -m pip install ninja cmake wheel pybind11 \
106
107
108
109
110
111
112
    && git clone https://github.com/OpenAI/triton.git \
    && cd triton \
    && git checkout "${TRITON_BRANCH}" \
    && cd python \
    && python3 setup.py bdist_wheel --dist-dir=/install; \
    # Create an empty directory otherwise as later build stages expect one
    else mkdir -p /install; \
113
114
    fi

115
116
117
118

### Final vLLM build stage
FROM base AS final
# Import the vLLM development directory from the build context
119
COPY . .
120
121
122
ARG GIT_REPO_CHECK=0
RUN --mount=type=bind,source=.git,target=.git \
    if [ "$GIT_REPO_CHECK" != 0 ]; then bash tools/check_repo.sh ; fi
123

124
125
RUN python3 -m pip install --upgrade pip

126
127
# Package upgrades for useful functionality or to avoid dependency issues
RUN --mount=type=cache,target=/root/.cache/pip \
128
    python3 -m pip install --upgrade numba scipy huggingface-hub[cli] pytest-shard
129

130

131
132
# Workaround for ray >= 2.10.0
ENV RAY_EXPERIMENTAL_NOSET_ROCR_VISIBLE_DEVICES=1
133
134
# Silences the HF Tokenizers warning
ENV TOKENIZERS_PARALLELISM=false
135

136
RUN --mount=type=cache,target=${CCACHE_DIR} \
137
    --mount=type=bind,source=.git,target=.git \
Simon Mo's avatar
Simon Mo committed
138
    --mount=type=cache,target=/root/.cache/pip \
139
    python3 -m pip install -Ur requirements-rocm.txt \
140
141
142
143
144
145
146
147
    && python3 setup.py clean --all \
    && python3 setup.py develop

# Copy amdsmi wheel into final image
RUN --mount=type=bind,from=build_amdsmi,src=/install,target=/install \
    mkdir -p libs \
    && cp /install/*.whl libs \
    # Preemptively uninstall to avoid same-version no-installs
148
    && python3 -m pip uninstall -y amdsmi;
149

150
151
152
153
154
155
# Copy triton wheel(s) into final image if they were built
RUN --mount=type=bind,from=build_triton,src=/install,target=/install \
    mkdir -p libs \
    && if ls /install/*.whl; then \
        cp /install/*.whl libs \
        # Preemptively uninstall to avoid same-version no-installs
156
        && python3 -m pip uninstall -y triton; fi
157
158
159
160
161
162
163

# Copy flash-attn wheel(s) into final image if they were built
RUN --mount=type=bind,from=build_fa,src=/install,target=/install \
    mkdir -p libs \
    && if ls /install/*.whl; then \
        cp /install/*.whl libs \
        # Preemptively uninstall to avoid same-version no-installs
164
        && python3 -m pip uninstall -y flash-attn; fi
165
166
167
168

# Install wheels that were built to the final image
RUN --mount=type=cache,target=/root/.cache/pip \
    if ls libs/*.whl; then \
169
    python3 -m pip install libs/*.whl; fi
170
171

CMD ["/bin/bash"]