Dockerfile_intel 9.61 KB
Newer Older
Wang, Yi's avatar
Wang, Yi committed
1
2
ARG PLATFORM=xpu

3
FROM lukemathwalker/cargo-chef:latest-rust-1.80.1 AS chef
4
5
6
7
WORKDIR /usr/src

ARG CARGO_REGISTRIES_CRATES_IO_PROTOCOL=sparse

8
FROM chef AS planner
ur4t's avatar
ur4t committed
9
COPY Cargo.lock Cargo.lock
10
11
12
13
14
COPY Cargo.toml Cargo.toml
COPY rust-toolchain.toml rust-toolchain.toml
COPY proto proto
COPY benchmark benchmark
COPY router router
Nicolas Patry's avatar
Nicolas Patry committed
15
COPY backends backends
16
17
18
19
20
COPY launcher launcher
RUN cargo chef prepare --recipe-path recipe.json

FROM chef AS builder

Nicolas Patry's avatar
Nicolas Patry committed
21
22
RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
    python3.11-dev
23
24
25
26
27
28
29
RUN PROTOC_ZIP=protoc-21.12-linux-x86_64.zip && \
    curl -OL https://github.com/protocolbuffers/protobuf/releases/download/v21.12/$PROTOC_ZIP && \
    unzip -o $PROTOC_ZIP -d /usr/local bin/protoc && \
    unzip -o $PROTOC_ZIP -d /usr/local 'include/*' && \
    rm -f $PROTOC_ZIP

COPY --from=planner /usr/src/recipe.json recipe.json
30
RUN cargo chef cook --profile release-opt --recipe-path recipe.json
31

Nicolas Patry's avatar
Nicolas Patry committed
32
33
34
ARG GIT_SHA
ARG DOCKER_LABEL

35
COPY Cargo.lock Cargo.lock
36
37
38
39
40
COPY Cargo.toml Cargo.toml
COPY rust-toolchain.toml rust-toolchain.toml
COPY proto proto
COPY benchmark benchmark
COPY router router
Nicolas Patry's avatar
Nicolas Patry committed
41
COPY backends backends
42
COPY launcher launcher
43
RUN cargo build --profile release-opt --frozen
44
45
46


# Text Generation Inference base image for Intel
Wang, Yi's avatar
Wang, Yi committed
47

48
FROM intel/intel-extension-for-pytorch:2.3.110-xpu AS xpu
49
50

USER root
51
52
53
54
55

ARG MAMBA_VERSION=23.1.0-1
ARG PYTHON_VERSION='3.11.10'
# Automatically set by buildx
ARG TARGETPLATFORM
56
ENV PATH=/opt/conda/bin:$PATH
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76

# TGI seem to require libssl.so.1.1 instead of libssl.so.3 so we can't use ubuntu 22.04. Ubuntu 20.04 has python==3.8, and TGI requires python>=3.9, hence the need for miniconda.
# Install mamba
# translating Docker's TARGETPLATFORM into mamba arches
RUN case ${TARGETPLATFORM} in \
         "linux/arm64")  MAMBA_ARCH=aarch64  ;; \
         *)              MAMBA_ARCH=x86_64   ;; \
    esac && \
    curl -fsSL -v -o ~/mambaforge.sh -O  "https://github.com/conda-forge/miniforge/releases/download/${MAMBA_VERSION}/Mambaforge-${MAMBA_VERSION}-Linux-${MAMBA_ARCH}.sh"
RUN chmod +x ~/mambaforge.sh && \
    bash ~/mambaforge.sh -b -p /opt/conda && \
    rm ~/mambaforge.sh

RUN case ${TARGETPLATFORM} in \
         "linux/arm64")  exit 1 ;; \
         *)              /opt/conda/bin/conda update -y conda &&  \
                         /opt/conda/bin/conda install -y "python=${PYTHON_VERSION}" ;; \
    esac && \
    /opt/conda/bin/conda clean -ya

77
78
79
80
# libssl.so.1.1 is not installed on Ubuntu 22.04 by default, install it
RUN wget http://nz2.archive.ubuntu.com/ubuntu/pool/main/o/openssl/libssl1.1_1.1.1f-1ubuntu2_amd64.deb && \
    dpkg -i ./libssl1.1_1.1.1f-1ubuntu2_amd64.deb

Wang, Yi's avatar
Wang, Yi committed
81
RUN wget -qO - https://repositories.intel.com/gpu/intel-graphics.key | gpg --dearmor | tee /usr/share/keyrings/intel-graphics.gpg > /dev/null
82
83
84
85

RUN wget -O- https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB \
| gpg --dearmor | tee /usr/share/keyrings/oneapi-archive-keyring.gpg > /dev/null && echo "deb [signed-by=/usr/share/keyrings/oneapi-archive-keyring.gpg] https://apt.repos.intel.com/oneapi all main" | tee /etc/apt/sources.list.d/oneAPI.list

86
RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt install -y intel-basekit xpu-smi cmake ninja-build pciutils
87
88

# Text Generation Inference base env
89
ENV HF_HOME=/data \
90
91
92
93
94
    HF_HUB_ENABLE_HF_TRANSFER=1 \
    PORT=80


WORKDIR /usr/src
95
RUN pip install torch==2.3.1+cxx11.abi torchvision==0.18.1+cxx11.abi torchaudio==2.3.1+cxx11.abi intel-extension-for-pytorch==2.3.110+xpu oneccl_bind_pt==2.3.100+xpu --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/ --no-cache-dir
96
RUN pip install triton-xpu==3.0.0b2 --no-cache-dir
97
98
99
100
101
102
103

# Install server
COPY proto proto
COPY server server
COPY server/Makefile server/Makefile
RUN cd server && \
    make gen-server && \
104
    pip install -r requirements_intel.txt && \
105
106
107
108
109
110
    pip install ".[accelerate, peft, outlines]" --no-cache-dir

ENV CCL_ROOT=/opt/intel/oneapi/ccl/latest
ENV I_MPI_ROOT=/opt/intel/oneapi/mpi/latest
ENV FI_PROVIDER_PATH=/opt/intel/oneapi/mpi/latest/opt/mpi/libfabric/lib/prov:/usr/lib/x86_64-linux-gnu/libfabric
ENV LIBRARY_PATH=/opt/intel/oneapi/mpi/latest/lib:/opt/intel/oneapi/ccl/latest/lib/:/opt/intel/oneapi/mkl/latest/lib/:/opt/intel/oneapi/compiler/latest/lib
111
112
ENV LD_LIBRARY_PATH=/opt/intel/oneapi/ccl/latest/lib/:/opt/intel/oneapi/mpi/latest/opt/mpi/libfabric/lib:/opt/intel/oneapi/mpi/latest/lib:/opt/intel/oneapi/mkl/latest/lib:/opt/intel/oneapi/compiler/latest/opt/compiler/lib:/opt/intel/oneapi/compiler/latest/lib:/opt/intel/oneapi/lib:/opt/intel/oneapi/lib/intel64:/opt/conda/lib
ENV PATH=/opt/conda/bin:/opt/intel/oneapi/mpi/latest/opt/mpi/libfabric/bin:/opt/intel/oneapi/mpi/latest/bin:/opt/intel/oneapi/mpi/latest/opt/mpi/libfabric/bin:/opt/intel/oneapi/mkl/latest/bin/:/opt/intel/oneapi/compiler/latest/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin
113
ENV CCL_ZE_IPC_EXCHANGE=sockets
Wang, Yi's avatar
Wang, Yi committed
114
115
ENV CMAKE_PREFIX_PATH=/opt/intel/oneapi/mkl/latest/lib/cmake:/opt/intel/oneapi/compiler/latest
ENV CPATH=/opt/intel/oneapi/mpi/latest/include:/opt/intel/oneapi/ccl/latest/include:/opt/intel/oneapi/mkl/latest/include
116
117
ENV TORCH_LLM_ALLREDUCE=1
ENV CCL_TOPO_FABRIC_VERTEX_CONNECTION_CHECK=0
Wang, Yi's avatar
Wang, Yi committed
118

119
# Install benchmarker
120
COPY --from=builder /usr/src/target/release-opt/text-generation-benchmark /usr/local/bin/text-generation-benchmark
121
# Install router
122
COPY --from=builder /usr/src/target/release-opt/text-generation-router /usr/local/bin/text-generation-router
123
# Install launcher
124
COPY --from=builder /usr/src/target/release-opt/text-generation-launcher /usr/local/bin/text-generation-launcher
125
126


Wang, Yi's avatar
Wang, Yi committed
127
# Text Generation Inference base image for Intel-cpu
128
FROM ubuntu:22.04 AS cpu
Wang, Yi's avatar
Wang, Yi committed
129
130
131
132
133

RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
    curl \
    ca-certificates \
    make \
134
135
    g++-12 \
    gcc-12 \
Wang, Yi's avatar
Wang, Yi committed
136
137
    git \
    wget \
138
139
    cmake \
    libnuma-dev
Wang, Yi's avatar
Wang, Yi committed
140

141
142
143
144
145
146
147
148
149
RUN update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-12 12
RUN update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-12 12
RUN update-alternatives --install /usr/bin/cc cc /usr/bin/gcc 30
RUN update-alternatives --set cc /usr/bin/gcc

RUN update-alternatives --install /usr/bin/c++ c++ /usr/bin/g++ 30
RUN update-alternatives --set c++ /usr/bin/g++


Wang, Yi's avatar
Wang, Yi committed
150
151
152
153
154
ENV HUGGINGFACE_HUB_CACHE=/data \
    HF_HUB_ENABLE_HF_TRANSFER=1 \
    PORT=80

ARG MAMBA_VERSION=23.1.0-1
Nicolas Patry's avatar
Nicolas Patry committed
155
ARG PYTHON_VERSION='3.11.10'
Wang, Yi's avatar
Wang, Yi committed
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
# Automatically set by buildx
ARG TARGETPLATFORM
ENV PATH /opt/conda/bin:$PATH

# TGI seem to require libssl.so.1.1 instead of libssl.so.3 so we can't use ubuntu 22.04. Ubuntu 20.04 has python==3.8, and TGI requires python>=3.9, hence the need for miniconda.
# Install mamba
# translating Docker's TARGETPLATFORM into mamba arches
RUN case ${TARGETPLATFORM} in \
         "linux/arm64")  MAMBA_ARCH=aarch64  ;; \
         *)              MAMBA_ARCH=x86_64   ;; \
    esac && \
    curl -fsSL -v -o ~/mambaforge.sh -O  "https://github.com/conda-forge/miniforge/releases/download/${MAMBA_VERSION}/Mambaforge-${MAMBA_VERSION}-Linux-${MAMBA_ARCH}.sh"
RUN chmod +x ~/mambaforge.sh && \
    bash ~/mambaforge.sh -b -p /opt/conda && \
    rm ~/mambaforge.sh

172
173
174
175
176
177
178
RUN case ${TARGETPLATFORM} in \
         "linux/arm64")  exit 1 ;; \
         *)              /opt/conda/bin/conda update -y conda &&  \
                         /opt/conda/bin/conda install -y "python=${PYTHON_VERSION}" ;; \
    esac && \
    /opt/conda/bin/conda clean -ya

Wang, Yi's avatar
Wang, Yi committed
179
180
RUN conda install -c conda-forge gperftools mkl

181
182
183
184
185

RUN pip install https://download.pytorch.org/whl/nightly/cpu/torch-2.5.0.dev20240815%2Bcpu-cp311-cp311-linux_x86_64.whl
RUN pip install https://download.pytorch.org/whl/nightly/cpu/torchvision-0.20.0.dev20240815%2Bcpu-cp311-cp311-linux_x86_64.whl
RUN pip install https://download.pytorch.org/whl/nightly/cpu/torchaudio-2.4.0.dev20240815%2Bcpu-cp311-cp311-linux_x86_64.whl

186
RUN pip install triton py-libnuma
Wang, Yi's avatar
Wang, Yi committed
187
188
189

WORKDIR /usr/src

190
191
RUN git clone https://github.com/intel/intel-extension-for-pytorch && cd intel-extension-for-pytorch && git checkout f86e93e4890dc2c989024d148d415c9aa8a1649f
RUN git clone https://github.com/intel/torch-ccl.git && cd torch-ccl && git checkout v2.4.0+cpu+rc0
Wang, Yi's avatar
Wang, Yi committed
192
193
194
195
196

RUN cd intel-extension-for-pytorch && git submodule sync && git submodule update --init --recursive && python setup.py install

RUN cd torch-ccl && git submodule sync && git submodule update --init --recursive && pip install .

197
ENV LD_PRELOAD=/opt/conda/lib/libtcmalloc.so
198
199
200
201
ENV CCL_ROOT=/opt/conda/lib/python3.11/site-packages/oneccl_bindings_for_pytorch
ENV I_MPI_ROOT=/opt/conda/lib/python3.11/site-packages/oneccl_bindings_for_pytorch
ENV FI_PROVIDER_PATH=/opt/conda/lib/python3.11/site-packages/oneccl_bindings_for_pytorch/opt/mpi/libfabric/lib/prov:/usr/lib64/libfabric
ENV LD_LIBRARY_PATH=/opt/conda/lib/python3.11/site-packages/oneccl_bindings_for_pytorch/opt/mpi/libfabric/lib:/opt/conda/lib/python3.11/site-packages/oneccl_bindings_for_pytorch/lib
Nicolas Patry's avatar
Nicolas Patry committed
202
ENV LD_LIBRARY_PATH="$LD_LIBRARY_PATH:/opt/conda/lib/"
Wang, Yi's avatar
Wang, Yi committed
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219

# Install server
COPY proto proto
COPY server server
COPY server/Makefile server/Makefile
RUN cd server && \
    make gen-server && \
    pip install -r requirements_intel.txt && \
    pip install ".[accelerate, peft, outlines]" --no-cache-dir

# Install benchmarker
COPY --from=builder /usr/src/target/release-opt/text-generation-benchmark /usr/local/bin/text-generation-benchmark
# Install router
COPY --from=builder /usr/src/target/release-opt/text-generation-router /usr/local/bin/text-generation-router
# Install launcher
COPY --from=builder /usr/src/target/release-opt/text-generation-launcher /usr/local/bin/text-generation-launcher

220
FROM ${PLATFORM} AS final
221
ENV ATTENTION=paged
222
223
ENV PREFIX_CACHING=0
ENV PREFILL_CHUNKING=0
224
ENV CUDA_GRAPHS=0
225
226
ENTRYPOINT ["text-generation-launcher"]
CMD ["--json-output"]