Dockerfile 5.64 KB
Newer Older
1
# Rust builder
2
3
4
5
6
7
8
9
10
11
12
13
FROM lukemathwalker/cargo-chef:latest-rust-1.67 AS chef
WORKDIR /usr/src

FROM chef as planner
COPY Cargo.toml Cargo.toml
COPY rust-toolchain.toml rust-toolchain.toml
COPY proto proto
COPY router router
COPY launcher launcher
RUN cargo chef prepare --recipe-path recipe.json

FROM chef AS builder
14
15
16
17
18
19

RUN PROTOC_ZIP=protoc-21.12-linux-x86_64.zip && \
    curl -OL https://github.com/protocolbuffers/protobuf/releases/download/v21.12/$PROTOC_ZIP && \
    unzip -o $PROTOC_ZIP -d /usr/local bin/protoc && \
    unzip -o $PROTOC_ZIP -d /usr/local 'include/*' && \
    rm -f $PROTOC_ZIP
Olivier Dehaene's avatar
Olivier Dehaene committed
20

21
22
COPY --from=planner /usr/src/recipe.json recipe.json
RUN cargo chef cook --release --recipe-path recipe.json
Olivier Dehaene's avatar
Olivier Dehaene committed
23

24
COPY Cargo.toml Cargo.toml
25
COPY rust-toolchain.toml rust-toolchain.toml
Olivier Dehaene's avatar
Olivier Dehaene committed
26
27
COPY proto proto
COPY router router
Olivier Dehaene's avatar
v0.1.0  
Olivier Dehaene committed
28
COPY launcher launcher
29
RUN cargo build --release
Olivier Dehaene's avatar
v0.1.0  
Olivier Dehaene committed
30

31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
# Python builder
# Adapted from: https://github.com/pytorch/pytorch/blob/master/Dockerfile
FROM ubuntu:22.04 as pytorch-install

ARG PYTORCH_VERSION=2.0.0
ARG PYTHON_VERSION=3.9
ARG CUDA_VERSION=11.8
ARG MAMBA_VERSION=23.1.0-1
ARG CUDA_CHANNEL=nvidia
ARG INSTALL_CHANNEL=pytorch
# Automatically set by buildx
ARG TARGETPLATFORM

RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
        build-essential \
        ca-certificates \
        ccache \
        cmake \
        curl \
        git && \
        rm -rf /var/lib/apt/lists/*
RUN /usr/sbin/update-ccache-symlinks &&  \
    mkdir /opt/ccache &&  \
    ccache --set-config=cache_dir=/opt/ccache
ENV PATH /opt/conda/bin:$PATH

# Install conda
# translating Docker's TARGETPLATFORM into mamba arches
RUN case ${TARGETPLATFORM} in \
         "linux/arm64")  MAMBA_ARCH=aarch64  ;; \
         *)              MAMBA_ARCH=x86_64   ;; \
    esac && \
    curl -fsSL -v -o ~/mambaforge.sh -O  "https://github.com/conda-forge/miniforge/releases/download/${MAMBA_VERSION}/Mambaforge-${MAMBA_VERSION}-Linux-${MAMBA_ARCH}.sh"
RUN chmod +x ~/mambaforge.sh && \
    bash ~/mambaforge.sh -b -p /opt/conda && \
    rm ~/mambaforge.sh

# Install pytorch
# On arm64 we exit with an error code
RUN case ${TARGETPLATFORM} in \
         "linux/arm64")  exit 1 ;; \
         *)              /opt/conda/bin/conda update -y conda &&  \
                         /opt/conda/bin/conda install -c "${INSTALL_CHANNEL}" -c "${CUDA_CHANNEL}" -y "python=${PYTHON_VERSION}" pytorch==$PYTORCH_VERSION "pytorch-cuda=$(echo $CUDA_VERSION | cut -d'.' -f 1-2)"  ;; \
    esac && \
    /opt/conda/bin/conda clean -ya

# CUDA kernels builder image
FROM pytorch-install as kernel-builder

RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
        ninja-build \
        && rm -rf /var/lib/apt/lists/*

RUN /opt/conda/bin/conda install -c "nvidia/label/cuda-11.8.0"  cuda==11.8 && \
    /opt/conda/bin/conda clean -ya


# Build Flash Attention CUDA kernels
FROM kernel-builder as flash-att-builder
Olivier Dehaene's avatar
Olivier Dehaene committed
90

91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
WORKDIR /usr/src

COPY server/Makefile-flash-att Makefile

# Build specific version of flash attention
RUN make build-flash-attention

# Build Transformers CUDA kernels
FROM kernel-builder as transformers-builder

WORKDIR /usr/src

COPY server/Makefile-transformers Makefile

# Build specific version of transformers
RUN BUILD_EXTENSIONS="True" make build-transformers

# Text Generation Inference base image
OlivierDehaene's avatar
OlivierDehaene committed
109
FROM debian:bullseye-slim as base
110
111
112
113
114
115
116
117
118
119
120
121
122

ARG TARGETPLATFORM
ARG PYTORCH_VERSION=2.0.0
ARG CUDA_VERSION=11.8

# Conda and CUDA env
ENV PATH=/opt/conda/bin:$PATH \
    NVIDIA_VISIBLE_DEVICES=all \
    NVIDIA_DRIVER_CAPABILITIES=compute,utility \
    LD_LIBRARY_PATH=/usr/local/nvidia/lib:/usr/local/nvidia/lib64

# Text Generation Inference base env
ENV HUGGINGFACE_HUB_CACHE=/data \
123
    HF_HUB_ENABLE_HF_TRANSFER=1 \
124
    MODEL_ID=bigscience/bloom-560m \
125
    QUANTIZE=false \
126
    NUM_SHARD=1 \
127
    PORT=80
Olivier Dehaene's avatar
Olivier Dehaene committed
128

129
LABEL com.nvidia.volumes.needed="nvidia_driver"
Olivier Dehaene's avatar
Olivier Dehaene committed
130

Olivier Dehaene's avatar
v0.1.0  
Olivier Dehaene committed
131
132
WORKDIR /usr/src

133
134
RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
        libssl-dev \
135
        ca-certificates \
136
137
        make \
        && rm -rf /var/lib/apt/lists/*
138

139
140
# Copy conda with PyTorch installed
COPY --from=pytorch-install /opt/conda /opt/conda
Olivier Dehaene's avatar
Olivier Dehaene committed
141

142
143
144
145
# Copy build artifacts from flash attention builder
COPY --from=flash-att-builder /usr/src/flash-attention/build/lib.linux-x86_64-cpython-39 /opt/conda/lib/python3.9/site-packages
COPY --from=flash-att-builder /usr/src/flash-attention/csrc/layer_norm/build/lib.linux-x86_64-cpython-39 /opt/conda/lib/python3.9/site-packages
COPY --from=flash-att-builder /usr/src/flash-attention/csrc/rotary/build/lib.linux-x86_64-cpython-39 /opt/conda/lib/python3.9/site-packages
Olivier Dehaene's avatar
Olivier Dehaene committed
146

147
148
149
150
151
# Copy build artifacts from transformers builder
COPY --from=transformers-builder /usr/src/transformers /usr/src/transformers
COPY --from=transformers-builder /usr/src/transformers/build/lib.linux-x86_64-cpython-39/transformers /usr/src/transformers/src/transformers

# Install transformers dependencies
152
RUN cd /usr/src/transformers && pip install -e . --no-cache-dir && pip install einops --no-cache-dir
153

Olivier Dehaene's avatar
Olivier Dehaene committed
154
# Install server
Nicolas Patry's avatar
Nicolas Patry committed
155
COPY proto proto
Olivier Dehaene's avatar
Olivier Dehaene committed
156
COPY server server
157
COPY server/Makefile server/Makefile
Olivier Dehaene's avatar
Olivier Dehaene committed
158
RUN cd server && \
Nicolas Patry's avatar
Nicolas Patry committed
159
    make gen-server && \
160
    pip install ".[bnb]" --no-cache-dir
Olivier Dehaene's avatar
Olivier Dehaene committed
161
162

# Install router
163
COPY --from=builder /usr/src/target/release/text-generation-router /usr/local/bin/text-generation-router
Nicolas Patry's avatar
Nicolas Patry committed
164
# Install launcher
165
COPY --from=builder /usr/src/target/release/text-generation-launcher /usr/local/bin/text-generation-launcher
Olivier Dehaene's avatar
Olivier Dehaene committed
166

167
168
169
170
171
172
173
174
# AWS Sagemaker compatbile image
FROM base as sagemaker

COPY sagemaker-entrypoint.sh entrypoint.sh
RUN chmod +x entrypoint.sh

ENTRYPOINT ["./entrypoint.sh"]

175
# Final image
176
177
FROM base

178
179
ENTRYPOINT ["text-generation-launcher"]
CMD ["--json-output"]