Dockerfile 7.16 KB
Newer Older
1
# Rust builder
2
FROM lukemathwalker/cargo-chef:latest-rust-1.69 AS chef
3
4
5
6
7
8
WORKDIR /usr/src

FROM chef as planner
COPY Cargo.toml Cargo.toml
COPY rust-toolchain.toml rust-toolchain.toml
COPY proto proto
9
COPY benchmark benchmark
10
11
12
13
14
COPY router router
COPY launcher launcher
RUN cargo chef prepare --recipe-path recipe.json

FROM chef AS builder
15

16
ARG GIT_SHA
17
ARG DOCKER_LABEL
18

19
20
21
22
23
RUN PROTOC_ZIP=protoc-21.12-linux-x86_64.zip && \
    curl -OL https://github.com/protocolbuffers/protobuf/releases/download/v21.12/$PROTOC_ZIP && \
    unzip -o $PROTOC_ZIP -d /usr/local bin/protoc && \
    unzip -o $PROTOC_ZIP -d /usr/local 'include/*' && \
    rm -f $PROTOC_ZIP
Olivier Dehaene's avatar
Olivier Dehaene committed
24

25
26
COPY --from=planner /usr/src/recipe.json recipe.json
RUN cargo chef cook --release --recipe-path recipe.json
Olivier Dehaene's avatar
Olivier Dehaene committed
27

28
COPY Cargo.toml Cargo.toml
29
COPY rust-toolchain.toml rust-toolchain.toml
Olivier Dehaene's avatar
Olivier Dehaene committed
30
COPY proto proto
31
COPY benchmark benchmark
Olivier Dehaene's avatar
Olivier Dehaene committed
32
COPY router router
Olivier Dehaene's avatar
v0.1.0  
Olivier Dehaene committed
33
COPY launcher launcher
34
RUN cargo build --release
Olivier Dehaene's avatar
v0.1.0  
Olivier Dehaene committed
35

36
37
38
# Python builder
# Adapted from: https://github.com/pytorch/pytorch/blob/master/Dockerfile
FROM debian:bullseye-slim as pytorch-install
39

40
41
42
ARG PYTORCH_VERSION=2.0.0
ARG PYTHON_VERSION=3.9
ARG CUDA_VERSION=11.8
43
ARG MAMBA_VERSION=23.1.0-1
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
ARG CUDA_CHANNEL=nvidia
ARG INSTALL_CHANNEL=pytorch
# Automatically set by buildx
ARG TARGETPLATFORM

ENV PATH /opt/conda/bin:$PATH

RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
        build-essential \
        ca-certificates \
        ccache \
        curl \
        git && \
        rm -rf /var/lib/apt/lists/*

# Install conda
# translating Docker's TARGETPLATFORM into mamba arches
RUN case ${TARGETPLATFORM} in \
         "linux/arm64")  MAMBA_ARCH=aarch64  ;; \
         *)              MAMBA_ARCH=x86_64   ;; \
    esac && \
    curl -fsSL -v -o ~/mambaforge.sh -O  "https://github.com/conda-forge/miniforge/releases/download/${MAMBA_VERSION}/Mambaforge-${MAMBA_VERSION}-Linux-${MAMBA_ARCH}.sh"
RUN chmod +x ~/mambaforge.sh && \
    bash ~/mambaforge.sh -b -p /opt/conda && \
    rm ~/mambaforge.sh

# Install pytorch
# On arm64 we exit with an error code
RUN case ${TARGETPLATFORM} in \
         "linux/arm64")  exit 1 ;; \
         *)              /opt/conda/bin/conda update -y conda &&  \
                         /opt/conda/bin/conda install -c "${INSTALL_CHANNEL}" -c "${CUDA_CHANNEL}" -y "python=${PYTHON_VERSION}" pytorch==$PYTORCH_VERSION "pytorch-cuda=$(echo $CUDA_VERSION | cut -d'.' -f 1-2)"  ;; \
    esac && \
    /opt/conda/bin/conda clean -ya

# CUDA kernels builder image
FROM pytorch-install as kernel-builder

RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
        ninja-build \
        && rm -rf /var/lib/apt/lists/*

RUN /opt/conda/bin/conda install -c "nvidia/label/cuda-11.8.0"  cuda==11.8 && \
    /opt/conda/bin/conda clean -ya


# Build Flash Attention CUDA kernels
FROM kernel-builder as flash-att-builder

WORKDIR /usr/src

COPY server/Makefile-flash-att Makefile

# Build specific version of flash attention
RUN make build-flash-attention

# Build Transformers CUDA kernels
FROM kernel-builder as transformers-builder
102

103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
WORKDIR /usr/src

COPY server/Makefile-transformers Makefile

# Build specific version of transformers
RUN BUILD_EXTENSIONS="True" make build-transformers

# Text Generation Inference base image
FROM debian:bullseye-slim as base

# Conda env
ENV PATH=/opt/conda/bin:$PATH \
    CONDA_PREFIX=/opt/conda

# Text Generation Inference base env
ENV HUGGINGFACE_HUB_CACHE=/data \
119
    HF_HUB_ENABLE_HF_TRANSFER=1 \
120
    MODEL_ID=bigscience/bloom-560m \
121
    QUANTIZE=false \
122
    NUM_SHARD=1 \
123
    PORT=80
Olivier Dehaene's avatar
Olivier Dehaene committed
124

125
# NVIDIA env vars
126
127
ENV CUDA_VERSION=11.8.0
ENV NVIDIA_REQUIRE_CUDA=cuda>=11.8 brand=tesla,driver>=450,driver<451 brand=tesla,driver>=470,driver<471 brand=unknown,driver>=470,driver<471 brand=nvidia,driver>=470,driver<471 brand=nvidiartx,driver>=470,driver<471 brand=geforce,driver>=470,driver<471 brand=geforcertx,driver>=470,driver<471 brand=quadro,driver>=470,driver<471 brand=quadrortx,driver>=470,driver<471 brand=titan,driver>=470,driver<471 brand=titanrtx,driver>=470,driver<471 brand=tesla,driver>=510,driver<511 brand=unknown,driver>=510,driver<511 brand=nvidia,driver>=510,driver<511 brand=nvidiartx,driver>=510,driver<511 brand=geforce,driver>=510,driver<511 brand=geforcertx,driver>=510,driver<511 brand=quadro,driver>=510,driver<511 brand=quadrortx,driver>=510,driver<511 brand=titan,driver>=510,driver<511 brand=titanrtx,driver>=510,driver<511 brand=tesla,driver>=515,driver<516 brand=unknown,driver>=515,driver<516 brand=nvidia,driver>=515,driver<516 brand=nvidiartx,driver>=515,driver<516 brand=geforce,driver>=515,driver<516 brand=geforcertx,driver>=515,driver<516 brand=quadro,driver>=515,driver<516 brand=quadrortx,driver>=515,driver<516 brand=titan,driver>=515,driver<516 brand=titanrtx,driver>=515,driver<516
128
129
130
131
132
133
134
135
ENV NVIDIA_VISIBLE_DEVICES all
ENV NVIDIA_DRIVER_CAPABILITIES compute,utility
# Required for nvidia-docker v1
RUN /bin/bash -c echo "/usr/local/nvidia/lib" >> /etc/ld.so.conf.d/nvidia.conf && \
    echo "/usr/local/nvidia/lib64" >> /etc/ld.so.conf.d/nvidia.conf
ENV LD_LIBRARY_PATH /usr/local/nvidia/lib:/usr/local/nvidia/lib64
ENV PATH /usr/local/nvidia/bin:/usr/local/cuda/bin:$PATH

136
LABEL com.nvidia.volumes.needed="nvidia_driver"
Olivier Dehaene's avatar
v0.1.0  
Olivier Dehaene committed
137

138
WORKDIR /usr/src
139

140
141
142
143
144
RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
        libssl-dev \
        ca-certificates \
        make \
        && rm -rf /var/lib/apt/lists/*
Olivier Dehaene's avatar
Olivier Dehaene committed
145

146
147
# Copy conda with PyTorch installed
COPY --from=pytorch-install /opt/conda /opt/conda
Olivier Dehaene's avatar
Olivier Dehaene committed
148

149
150
151
152
# Copy build artifacts from flash attention builder
COPY --from=flash-att-builder /usr/src/flash-attention/build/lib.linux-x86_64-cpython-39 /opt/conda/lib/python3.9/site-packages
COPY --from=flash-att-builder /usr/src/flash-attention/csrc/layer_norm/build/lib.linux-x86_64-cpython-39 /opt/conda/lib/python3.9/site-packages
COPY --from=flash-att-builder /usr/src/flash-attention/csrc/rotary/build/lib.linux-x86_64-cpython-39 /opt/conda/lib/python3.9/site-packages
153

154
155
156
157
158
159
# Copy build artifacts from transformers builder
COPY --from=transformers-builder /usr/src/transformers /usr/src/transformers
COPY --from=transformers-builder /usr/src/transformers/build/lib.linux-x86_64-cpython-39/transformers /usr/src/transformers/src/transformers

# Install transformers dependencies
RUN cd /usr/src/transformers && pip install -e . --no-cache-dir && pip install einops --no-cache-dir
160

Olivier Dehaene's avatar
Olivier Dehaene committed
161
# Install server
Nicolas Patry's avatar
Nicolas Patry committed
162
COPY proto proto
Olivier Dehaene's avatar
Olivier Dehaene committed
163
COPY server server
164
COPY server/Makefile server/Makefile
Olivier Dehaene's avatar
Olivier Dehaene committed
165
RUN cd server && \
Nicolas Patry's avatar
Nicolas Patry committed
166
    make gen-server && \
167
    pip install -r requirements.txt && \
168
    pip install ".[bnb, accelerate]" --no-cache-dir
Olivier Dehaene's avatar
Olivier Dehaene committed
169

170
171
# Install benchmarker
COPY --from=builder /usr/src/target/release/text-generation-benchmark /usr/local/bin/text-generation-benchmark
Olivier Dehaene's avatar
Olivier Dehaene committed
172
# Install router
173
COPY --from=builder /usr/src/target/release/text-generation-router /usr/local/bin/text-generation-router
Nicolas Patry's avatar
Nicolas Patry committed
174
# Install launcher
175
COPY --from=builder /usr/src/target/release/text-generation-launcher /usr/local/bin/text-generation-launcher
Olivier Dehaene's avatar
Olivier Dehaene committed
176

177
178
179
180
181
182
183
184
# AWS Sagemaker compatbile image
FROM base as sagemaker

COPY sagemaker-entrypoint.sh entrypoint.sh
RUN chmod +x entrypoint.sh

ENTRYPOINT ["./entrypoint.sh"]

185
# Final image
186
187
FROM base

188
189
ENTRYPOINT ["text-generation-launcher"]
CMD ["--json-output"]