Dockerfile_amd 7.14 KB
Newer Older
fxmarty's avatar
fxmarty committed
1
# Rust builder
2
FROM lukemathwalker/cargo-chef:latest-rust-1.80 AS chef
fxmarty's avatar
fxmarty committed
3
4
5
6
WORKDIR /usr/src

ARG CARGO_REGISTRIES_CRATES_IO_PROTOCOL=sparse

7
FROM chef AS planner
ur4t's avatar
ur4t committed
8
COPY Cargo.lock Cargo.lock
fxmarty's avatar
fxmarty committed
9
10
11
12
13
COPY Cargo.toml Cargo.toml
COPY rust-toolchain.toml rust-toolchain.toml
COPY proto proto
COPY benchmark benchmark
COPY router router
Nicolas Patry's avatar
Nicolas Patry committed
14
COPY backends backends
fxmarty's avatar
fxmarty committed
15
16
17
18
19
COPY launcher launcher
RUN cargo chef prepare --recipe-path recipe.json

FROM chef AS builder

Nicolas Patry's avatar
Nicolas Patry committed
20
21
RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
    python3.11-dev
fxmarty's avatar
fxmarty committed
22
23
24
25
26
27
28
RUN PROTOC_ZIP=protoc-21.12-linux-x86_64.zip && \
    curl -OL https://github.com/protocolbuffers/protobuf/releases/download/v21.12/$PROTOC_ZIP && \
    unzip -o $PROTOC_ZIP -d /usr/local bin/protoc && \
    unzip -o $PROTOC_ZIP -d /usr/local 'include/*' && \
    rm -f $PROTOC_ZIP

COPY --from=planner /usr/src/recipe.json recipe.json
29
RUN cargo chef cook --profile release-opt --recipe-path recipe.json
fxmarty's avatar
fxmarty committed
30

Nicolas Patry's avatar
Nicolas Patry committed
31
32
33
ARG GIT_SHA
ARG DOCKER_LABEL

fxmarty's avatar
fxmarty committed
34
35
36
37
38
COPY Cargo.toml Cargo.toml
COPY rust-toolchain.toml rust-toolchain.toml
COPY proto proto
COPY benchmark benchmark
COPY router router
Nicolas Patry's avatar
Nicolas Patry committed
39
COPY backends backends
fxmarty's avatar
fxmarty committed
40
COPY launcher launcher
41
RUN cargo build --profile release-opt
fxmarty's avatar
fxmarty committed
42
43

# Text Generation Inference base image for RoCm
44
FROM rocm/dev-ubuntu-22.04:6.1.1_hip_update AS base
fxmarty's avatar
fxmarty committed
45
46
47
48
49
50
51
52
53
54
55
56
57

RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
    build-essential \
    ca-certificates \
    ccache \
    curl \
    git \
    make \
    libssl-dev \
    g++ \
    # Needed to build VLLM & flash.
    rocthrust-dev \
    hipsparse-dev \
fxmarty's avatar
fxmarty committed
58
59
60
61
62
63
64
65
66
67
68
    hipblas-dev \
    hipblaslt-dev \
    rocblas-dev \
    hiprand-dev \
    rocrand-dev \
    miopen-hip-dev \
    hipfft-dev \
    hipcub-dev \
    hipsolver-dev \
    rccl-dev \
    cmake \
Nicolas Patry's avatar
Nicolas Patry committed
69
    python3.11-dev && \
fxmarty's avatar
fxmarty committed
70
71
72
73
    rm -rf /var/lib/apt/lists/*

# Keep in sync with `server/pyproject.toml
ARG MAMBA_VERSION=23.1.0-1
fxmarty's avatar
fxmarty committed
74
75
ARG PYTORCH_VERSION='2.3.0'
ARG ROCM_VERSION='6.0.2'
Nicolas Patry's avatar
Nicolas Patry committed
76
ARG PYTHON_VERSION='3.11.10'
fxmarty's avatar
fxmarty committed
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
# Automatically set by buildx
ARG TARGETPLATFORM
ENV PATH /opt/conda/bin:$PATH

# TGI seem to require libssl.so.1.1 instead of libssl.so.3 so we can't use ubuntu 22.04. Ubuntu 20.04 has python==3.8, and TGI requires python>=3.9, hence the need for miniconda.
# Install mamba
# translating Docker's TARGETPLATFORM into mamba arches
RUN case ${TARGETPLATFORM} in \
         "linux/arm64")  MAMBA_ARCH=aarch64  ;; \
         *)              MAMBA_ARCH=x86_64   ;; \
    esac && \
    curl -fsSL -v -o ~/mambaforge.sh -O  "https://github.com/conda-forge/miniforge/releases/download/${MAMBA_VERSION}/Mambaforge-${MAMBA_VERSION}-Linux-${MAMBA_ARCH}.sh"
RUN chmod +x ~/mambaforge.sh && \
    bash ~/mambaforge.sh -b -p /opt/conda && \
    mamba init && \
    rm ~/mambaforge.sh

Nicolas Patry's avatar
Nicolas Patry committed
94
95
96
97
98
99
100
101
102
# RUN conda install intel::mkl-static intel::mkl-include
# Install pytorch
# On arm64 we exit with an error code
RUN case ${TARGETPLATFORM} in \
         "linux/arm64")  exit 1 ;; \
         *)              /opt/conda/bin/conda update -y conda &&  \
                         /opt/conda/bin/conda install -y "python=${PYTHON_VERSION}" ;; \
    esac && \
    /opt/conda/bin/conda clean -ya
fxmarty's avatar
fxmarty committed
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
# Install flash-attention, torch dependencies
RUN pip install numpy einops ninja --no-cache-dir

RUN pip uninstall -y triton && \
    git clone --depth 1 --single-branch https://github.com/ROCm/triton.git && \
    cd triton/python && \
    pip install .

RUN git clone --depth 1 --recursive --single-branch --branch 2.3-patched https://github.com/fxmarty/pytorch.git pytorch && cd pytorch && pip install -r requirements.txt --no-cache-dir

ARG _GLIBCXX_USE_CXX11_ABI="1"
ARG CMAKE_PREFIX_PATH="/opt/conda"
ARG PYTORCH_ROCM_ARCH="gfx90a;gfx942"
ARG BUILD_CAFFE2="0" \
    BUILD_CAFFE2_OPS="0" \
    USE_CUDA="0" \
    USE_ROCM="1" \
    BUILD_TEST="0" \
    USE_FBGEMM="0" \
    USE_NNPACK="0" \
    USE_QNNPACK="0" \
    USE_XNNPACK="0" \
    USE_FLASH_ATTENTION="1" \
    USE_MEM_EFF_ATTENTION="0"

RUN cd pytorch && python tools/amd_build/build_amd.py && python setup.py install

130
# Set AS recommended: https://github.com/ROCm/triton/wiki/A-script-to-set-program-execution-environment-in-ROCm
fxmarty's avatar
fxmarty committed
131
132
ENV HIP_FORCE_DEV_KERNARG=1

133
134
135
# On MI250 and MI300, performances for flash with Triton FA are slightly better than CK.
# However, Triton requires a tunning for each prompt length, which is prohibitive.
ENV ROCM_USE_FLASH_ATTN_V2_TRITON=0
fxmarty's avatar
fxmarty committed
136
137
138

FROM base AS kernel-builder

fxmarty's avatar
fxmarty committed
139
# # Build vllm kernels
fxmarty's avatar
fxmarty committed
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
FROM kernel-builder AS vllm-builder
WORKDIR /usr/src

COPY server/Makefile-vllm Makefile

# Build specific version of vllm
RUN make build-vllm-rocm

# Build Flash Attention v2 kernels
FROM kernel-builder AS flash-att-v2-builder
WORKDIR /usr/src

COPY server/Makefile-flash-att-v2 Makefile

# Build specific version of flash attention v2
RUN make build-flash-attention-v2-rocm

# Build Transformers CUDA kernels (gpt-neox and bloom)
158
FROM kernel-builder AS custom-kernels-builder
fxmarty's avatar
fxmarty committed
159
160
WORKDIR /usr/src
COPY server/custom_kernels/ .
fxmarty's avatar
fxmarty committed
161
RUN python setup.py build
fxmarty's avatar
fxmarty committed
162

fxmarty's avatar
fxmarty committed
163
# Build exllama kernels
164
FROM kernel-builder AS exllama-kernels-builder
fxmarty's avatar
fxmarty committed
165
166
167
WORKDIR /usr/src
COPY server/exllama_kernels/ .

fxmarty's avatar
fxmarty committed
168
RUN python setup.py build
fxmarty's avatar
fxmarty committed
169
170

# Build exllama v2 kernels
171
FROM kernel-builder AS exllamav2-kernels-builder
fxmarty's avatar
fxmarty committed
172
173
174
WORKDIR /usr/src
COPY server/exllamav2_kernels/ .

fxmarty's avatar
fxmarty committed
175
RUN python setup.py build
fxmarty's avatar
fxmarty committed
176

177
FROM base AS base-copy
fxmarty's avatar
fxmarty committed
178
179

# Text Generation Inference base env
180
ENV HF_HOME=/data \
fxmarty's avatar
fxmarty committed
181
182
183
184
    HF_HUB_ENABLE_HF_TRANSFER=1 \
    PORT=80

# Copy builds artifacts from vllm builder
Nicolas Patry's avatar
Nicolas Patry committed
185
COPY --from=vllm-builder /usr/src/vllm/build/lib.linux-x86_64-cpython-311 /opt/conda/lib/python3.11/site-packages
fxmarty's avatar
fxmarty committed
186
187

# Copy build artifacts from flash attention v2 builder
Nicolas Patry's avatar
Nicolas Patry committed
188
COPY --from=flash-att-v2-builder /usr/src/flash-attention-v2/build/lib.linux-x86_64-cpython-311 /opt/conda/lib/python3.11/site-packages
fxmarty's avatar
fxmarty committed
189
190

# Copy build artifacts from custom kernels builder
Nicolas Patry's avatar
Nicolas Patry committed
191
COPY --from=custom-kernels-builder /usr/src/build/lib.linux-x86_64-cpython-311 /opt/conda/lib/python3.11/site-packages
fxmarty's avatar
fxmarty committed
192
193

# Copy build artifacts from exllama kernels builder
Nicolas Patry's avatar
Nicolas Patry committed
194
COPY --from=exllama-kernels-builder /usr/src/build/lib.linux-x86_64-cpython-311 /opt/conda/lib/python3.11/site-packages
fxmarty's avatar
fxmarty committed
195
196

# Copy build artifacts from exllamav2 kernels builder
Nicolas Patry's avatar
Nicolas Patry committed
197
COPY --from=exllamav2-kernels-builder /usr/src/build/lib.linux-x86_64-cpython-311 /opt/conda/lib/python3.11/site-packages
fxmarty's avatar
fxmarty committed
198
199
200
201
202
203
204
205

# Install server
COPY proto proto
COPY server server
COPY server/Makefile server/Makefile
RUN cd server && \
    make gen-server && \
    pip install -r requirements_rocm.txt && \
OlivierDehaene's avatar
OlivierDehaene committed
206
    pip install ".[accelerate, peft, outlines]" --no-cache-dir
fxmarty's avatar
fxmarty committed
207
208

# Install benchmarker
209
COPY --from=builder /usr/src/target/release-opt/text-generation-benchmark /usr/local/bin/text-generation-benchmark
fxmarty's avatar
fxmarty committed
210
# Install router
211
COPY --from=builder /usr/src/target/release-opt/text-generation-router /usr/local/bin/text-generation-router
fxmarty's avatar
fxmarty committed
212
# Install launcher
213
COPY --from=builder /usr/src/target/release-opt/text-generation-launcher /usr/local/bin/text-generation-launcher
Nicolas Patry's avatar
Nicolas Patry committed
214
ENV LD_LIBRARY_PATH="$LD_LIBRARY_PATH:/opt/conda/lib/"
fxmarty's avatar
fxmarty committed
215
216

# AWS Sagemaker compatible image
217
FROM base AS sagemaker
fxmarty's avatar
fxmarty committed
218

fxmarty's avatar
fxmarty committed
219
220
221
222
223
224
225
226
COPY sagemaker-entrypoint.sh entrypoint.sh
RUN chmod +x entrypoint.sh

ENTRYPOINT ["./entrypoint.sh"]

# Final image
FROM base-copy

fxmarty's avatar
fxmarty committed
227
228
229
230
COPY ./tgi-entrypoint.sh /tgi-entrypoint.sh
RUN chmod +x /tgi-entrypoint.sh

ENTRYPOINT ["/tgi-entrypoint.sh"]
fxmarty's avatar
fxmarty committed
231
CMD ["--json-output"]