Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
text-generation-inference
Commits
2b3bd1e0
Unverified
Commit
2b3bd1e0
authored
Jul 03, 2024
by
Nicolas Patry
Committed by
GitHub
Jul 03, 2024
Browse files
Fixing the dockerfile warnings. (#2173)
parent
be4a4c47
Changes
3
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
28 additions
and
28 deletions
+28
-28
Dockerfile
Dockerfile
+16
-16
Dockerfile_amd
Dockerfile_amd
+8
-8
Dockerfile_intel
Dockerfile_intel
+4
-4
No files found.
Dockerfile
View file @
2b3bd1e0
...
@@ -4,7 +4,7 @@ WORKDIR /usr/src
...
@@ -4,7 +4,7 @@ WORKDIR /usr/src
ARG
CARGO_REGISTRIES_CRATES_IO_PROTOCOL=sparse
ARG
CARGO_REGISTRIES_CRATES_IO_PROTOCOL=sparse
FROM
chef
as
planner
FROM
chef
AS
planner
COPY
Cargo.lock Cargo.lock
COPY
Cargo.lock Cargo.lock
COPY
Cargo.toml Cargo.toml
COPY
Cargo.toml Cargo.toml
COPY
rust-toolchain.toml rust-toolchain.toml
COPY
rust-toolchain.toml rust-toolchain.toml
...
@@ -38,7 +38,7 @@ RUN cargo build --profile release-opt
...
@@ -38,7 +38,7 @@ RUN cargo build --profile release-opt
# Python builder
# Python builder
# Adapted from: https://github.com/pytorch/pytorch/blob/master/Dockerfile
# Adapted from: https://github.com/pytorch/pytorch/blob/master/Dockerfile
FROM
nvidia/cuda:12.1.0-devel-ubuntu22.04
as
pytorch-install
FROM
nvidia/cuda:12.1.0-devel-ubuntu22.04
AS
pytorch-install
ARG
PYTORCH_VERSION=2.3.0
ARG
PYTORCH_VERSION=2.3.0
ARG
PYTHON_VERSION=3.10
ARG
PYTHON_VERSION=3.10
...
@@ -81,7 +81,7 @@ RUN case ${TARGETPLATFORM} in \
...
@@ -81,7 +81,7 @@ RUN case ${TARGETPLATFORM} in \
/opt/conda/bin/conda clean
-ya
/opt/conda/bin/conda clean
-ya
# CUDA kernels builder image
# CUDA kernels builder image
FROM
pytorch-install
as
kernel-builder
FROM
pytorch-install
AS
kernel-builder
ARG
MAX_JOBS=8
ARG
MAX_JOBS=8
...
@@ -90,7 +90,7 @@ RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-ins
...
@@ -90,7 +90,7 @@ RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-ins
&&
rm
-rf
/var/lib/apt/lists/
*
&&
rm
-rf
/var/lib/apt/lists/
*
# Build Flash Attention CUDA kernels
# Build Flash Attention CUDA kernels
FROM
kernel-builder
as
flash-att-builder
FROM
kernel-builder
AS
flash-att-builder
WORKDIR
/usr/src
WORKDIR
/usr/src
...
@@ -100,7 +100,7 @@ COPY server/Makefile-flash-att Makefile
...
@@ -100,7 +100,7 @@ COPY server/Makefile-flash-att Makefile
RUN
make build-flash-attention
RUN
make build-flash-attention
# Build Flash Attention v2 CUDA kernels
# Build Flash Attention v2 CUDA kernels
FROM
kernel-builder
as
flash-att-v2-builder
FROM
kernel-builder
AS
flash-att-v2-builder
WORKDIR
/usr/src
WORKDIR
/usr/src
...
@@ -110,14 +110,14 @@ COPY server/Makefile-flash-att-v2 Makefile
...
@@ -110,14 +110,14 @@ COPY server/Makefile-flash-att-v2 Makefile
RUN
make build-flash-attention-v2-cuda
RUN
make build-flash-attention-v2-cuda
# Build Transformers exllama kernels
# Build Transformers exllama kernels
FROM
kernel-builder
as
exllama-kernels-builder
FROM
kernel-builder
AS
exllama-kernels-builder
WORKDIR
/usr/src
WORKDIR
/usr/src
COPY
server/exllama_kernels/ .
COPY
server/exllama_kernels/ .
RUN
TORCH_CUDA_ARCH_LIST
=
"8.0;8.6+PTX"
python setup.py build
RUN
TORCH_CUDA_ARCH_LIST
=
"8.0;8.6+PTX"
python setup.py build
# Build Transformers exllama kernels
# Build Transformers exllama kernels
FROM
kernel-builder
as
exllamav2-kernels-builder
FROM
kernel-builder
AS
exllamav2-kernels-builder
WORKDIR
/usr/src
WORKDIR
/usr/src
COPY
server/exllamav2_kernels/ .
COPY
server/exllamav2_kernels/ .
...
@@ -125,42 +125,42 @@ COPY server/exllamav2_kernels/ .
...
@@ -125,42 +125,42 @@ COPY server/exllamav2_kernels/ .
RUN
TORCH_CUDA_ARCH_LIST
=
"8.0;8.6+PTX"
python setup.py build
RUN
TORCH_CUDA_ARCH_LIST
=
"8.0;8.6+PTX"
python setup.py build
# Build Transformers awq kernels
# Build Transformers awq kernels
FROM
kernel-builder
as
awq-kernels-builder
FROM
kernel-builder
AS
awq-kernels-builder
WORKDIR
/usr/src
WORKDIR
/usr/src
COPY
server/Makefile-awq Makefile
COPY
server/Makefile-awq Makefile
# Build specific version of transformers
# Build specific version of transformers
RUN
TORCH_CUDA_ARCH_LIST
=
"8.0;8.6+PTX"
make build-awq
RUN
TORCH_CUDA_ARCH_LIST
=
"8.0;8.6+PTX"
make build-awq
# Build eetq kernels
# Build eetq kernels
FROM
kernel-builder
as
eetq-kernels-builder
FROM
kernel-builder
AS
eetq-kernels-builder
WORKDIR
/usr/src
WORKDIR
/usr/src
COPY
server/Makefile-eetq Makefile
COPY
server/Makefile-eetq Makefile
# Build specific version of transformers
# Build specific version of transformers
RUN
TORCH_CUDA_ARCH_LIST
=
"8.0;8.6+PTX"
make build-eetq
RUN
TORCH_CUDA_ARCH_LIST
=
"8.0;8.6+PTX"
make build-eetq
# Build marlin kernels
# Build marlin kernels
FROM
kernel-builder
as
marlin-kernels-builder
FROM
kernel-builder
AS
marlin-kernels-builder
WORKDIR
/usr/src
WORKDIR
/usr/src
COPY
server/marlin/ .
COPY
server/marlin/ .
# Build specific version of transformers
# Build specific version of transformers
RUN
TORCH_CUDA_ARCH_LIST
=
"8.0;8.6+PTX"
python setup.py build
RUN
TORCH_CUDA_ARCH_LIST
=
"8.0;8.6+PTX"
python setup.py build
# Build Lorax Punica kernels
# Build Lorax Punica kernels
FROM
kernel-builder
as
lorax-punica-builder
FROM
kernel-builder
AS
lorax-punica-builder
WORKDIR
/usr/src
WORKDIR
/usr/src
COPY
server/Makefile-lorax-punica Makefile
COPY
server/Makefile-lorax-punica Makefile
# Build specific version of transformers
# Build specific version of transformers
RUN
TORCH_CUDA_ARCH_LIST
=
"8.0;8.6+PTX"
make build-lorax-punica
RUN
TORCH_CUDA_ARCH_LIST
=
"8.0;8.6+PTX"
make build-lorax-punica
# Build Transformers CUDA kernels
# Build Transformers CUDA kernels
FROM
kernel-builder
as
custom-kernels-builder
FROM
kernel-builder
AS
custom-kernels-builder
WORKDIR
/usr/src
WORKDIR
/usr/src
COPY
server/custom_kernels/ .
COPY
server/custom_kernels/ .
# Build specific version of transformers
# Build specific version of transformers
RUN
python setup.py build
RUN
python setup.py build
# Build vllm CUDA kernels
# Build vllm CUDA kernels
FROM
kernel-builder
as
vllm-builder
FROM
kernel-builder
AS
vllm-builder
WORKDIR
/usr/src
WORKDIR
/usr/src
...
@@ -172,13 +172,13 @@ COPY server/Makefile-vllm Makefile
...
@@ -172,13 +172,13 @@ COPY server/Makefile-vllm Makefile
RUN
make build-vllm-cuda
RUN
make build-vllm-cuda
# Build mamba kernels
# Build mamba kernels
FROM
kernel-builder
as
mamba-builder
FROM
kernel-builder
AS
mamba-builder
WORKDIR
/usr/src
WORKDIR
/usr/src
COPY
server/Makefile-selective-scan Makefile
COPY
server/Makefile-selective-scan Makefile
RUN
make build-all
RUN
make build-all
# Text Generation Inference base image
# Text Generation Inference base image
FROM
nvidia/cuda:12.1.0-base-ubuntu22.04
as
base
FROM
nvidia/cuda:12.1.0-base-ubuntu22.04
AS
base
# Conda env
# Conda env
ENV
PATH=/opt/conda/bin:$PATH \
ENV
PATH=/opt/conda/bin:$PATH \
...
@@ -260,7 +260,7 @@ COPY --from=builder /usr/src/target/release-opt/text-generation-launcher /usr/lo
...
@@ -260,7 +260,7 @@ COPY --from=builder /usr/src/target/release-opt/text-generation-launcher /usr/lo
# AWS Sagemaker compatible image
# AWS Sagemaker compatible image
FROM
base
as
sagemaker
FROM
base
AS
sagemaker
COPY
sagemaker-entrypoint.sh entrypoint.sh
COPY
sagemaker-entrypoint.sh entrypoint.sh
RUN
chmod
+x entrypoint.sh
RUN
chmod
+x entrypoint.sh
...
...
Dockerfile_amd
View file @
2b3bd1e0
...
@@ -4,7 +4,7 @@ WORKDIR /usr/src
...
@@ -4,7 +4,7 @@ WORKDIR /usr/src
ARG CARGO_REGISTRIES_CRATES_IO_PROTOCOL=sparse
ARG CARGO_REGISTRIES_CRATES_IO_PROTOCOL=sparse
FROM chef
as
planner
FROM chef
AS
planner
COPY Cargo.lock Cargo.lock
COPY Cargo.lock Cargo.lock
COPY Cargo.toml Cargo.toml
COPY Cargo.toml Cargo.toml
COPY rust-toolchain.toml rust-toolchain.toml
COPY rust-toolchain.toml rust-toolchain.toml
...
@@ -37,7 +37,7 @@ COPY launcher launcher
...
@@ -37,7 +37,7 @@ COPY launcher launcher
RUN cargo build --profile release-opt
RUN cargo build --profile release-opt
# Text Generation Inference base image for RoCm
# Text Generation Inference base image for RoCm
FROM rocm/dev-ubuntu-22.04:6.1.1_hip_update
as
base
FROM rocm/dev-ubuntu-22.04:6.1.1_hip_update
AS
base
RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
build-essential \
build-essential \
...
@@ -115,7 +115,7 @@ ARG BUILD_CAFFE2="0" \
...
@@ -115,7 +115,7 @@ ARG BUILD_CAFFE2="0" \
RUN cd pytorch && python tools/amd_build/build_amd.py && python setup.py install
RUN cd pytorch && python tools/amd_build/build_amd.py && python setup.py install
# Set
as
recommended: https://github.com/ROCm/triton/wiki/A-script-to-set-program-execution-environment-in-ROCm
# Set
AS
recommended: https://github.com/ROCm/triton/wiki/A-script-to-set-program-execution-environment-in-ROCm
ENV HIP_FORCE_DEV_KERNARG=1
ENV HIP_FORCE_DEV_KERNARG=1
# On MI250 and MI300, performances for flash with Triton FA are slightly better than CK.
# On MI250 and MI300, performances for flash with Triton FA are slightly better than CK.
...
@@ -143,26 +143,26 @@ COPY server/Makefile-flash-att-v2 Makefile
...
@@ -143,26 +143,26 @@ COPY server/Makefile-flash-att-v2 Makefile
RUN make build-flash-attention-v2-rocm
RUN make build-flash-attention-v2-rocm
# Build Transformers CUDA kernels (gpt-neox and bloom)
# Build Transformers CUDA kernels (gpt-neox and bloom)
FROM kernel-builder
as
custom-kernels-builder
FROM kernel-builder
AS
custom-kernels-builder
WORKDIR /usr/src
WORKDIR /usr/src
COPY server/custom_kernels/ .
COPY server/custom_kernels/ .
RUN python setup.py build
RUN python setup.py build
# Build exllama kernels
# Build exllama kernels
FROM kernel-builder
as
exllama-kernels-builder
FROM kernel-builder
AS
exllama-kernels-builder
WORKDIR /usr/src
WORKDIR /usr/src
COPY server/exllama_kernels/ .
COPY server/exllama_kernels/ .
RUN python setup.py build
RUN python setup.py build
# Build exllama v2 kernels
# Build exllama v2 kernels
FROM kernel-builder
as
exllamav2-kernels-builder
FROM kernel-builder
AS
exllamav2-kernels-builder
WORKDIR /usr/src
WORKDIR /usr/src
COPY server/exllamav2_kernels/ .
COPY server/exllamav2_kernels/ .
RUN python setup.py build
RUN python setup.py build
FROM base
as
base-copy
FROM base
AS
base-copy
# Text Generation Inference base env
# Text Generation Inference base env
ENV HUGGINGFACE_HUB_CACHE=/data \
ENV HUGGINGFACE_HUB_CACHE=/data \
...
@@ -201,7 +201,7 @@ COPY --from=builder /usr/src/target/release-opt/text-generation-router /usr/loca
...
@@ -201,7 +201,7 @@ COPY --from=builder /usr/src/target/release-opt/text-generation-router /usr/loca
COPY --from=builder /usr/src/target/release-opt/text-generation-launcher /usr/local/bin/text-generation-launcher
COPY --from=builder /usr/src/target/release-opt/text-generation-launcher /usr/local/bin/text-generation-launcher
# AWS Sagemaker compatible image
# AWS Sagemaker compatible image
FROM base
as
sagemaker
FROM base
AS
sagemaker
COPY sagemaker-entrypoint.sh entrypoint.sh
COPY sagemaker-entrypoint.sh entrypoint.sh
RUN chmod +x entrypoint.sh
RUN chmod +x entrypoint.sh
...
...
Dockerfile_intel
View file @
2b3bd1e0
...
@@ -5,7 +5,7 @@ WORKDIR /usr/src
...
@@ -5,7 +5,7 @@ WORKDIR /usr/src
ARG CARGO_REGISTRIES_CRATES_IO_PROTOCOL=sparse
ARG CARGO_REGISTRIES_CRATES_IO_PROTOCOL=sparse
FROM chef
as
planner
FROM chef
AS
planner
COPY Cargo.lock Cargo.lock
COPY Cargo.lock Cargo.lock
COPY Cargo.toml Cargo.toml
COPY Cargo.toml Cargo.toml
COPY rust-toolchain.toml rust-toolchain.toml
COPY rust-toolchain.toml rust-toolchain.toml
...
@@ -40,7 +40,7 @@ RUN cargo build --profile release-opt
...
@@ -40,7 +40,7 @@ RUN cargo build --profile release-opt
# Text Generation Inference base image for Intel
# Text Generation Inference base image for Intel
FROM intel/intel-extension-for-pytorch:2.1.30-xpu
as
xpu
FROM intel/intel-extension-for-pytorch:2.1.30-xpu
AS
xpu
USER root
USER root
# libssl.so.1.1 is not installed on Ubuntu 22.04 by default, install it
# libssl.so.1.1 is not installed on Ubuntu 22.04 by default, install it
...
@@ -95,7 +95,7 @@ COPY --from=builder /usr/src/target/release-opt/text-generation-launcher /usr/lo
...
@@ -95,7 +95,7 @@ COPY --from=builder /usr/src/target/release-opt/text-generation-launcher /usr/lo
# Text Generation Inference base image for Intel-cpu
# Text Generation Inference base image for Intel-cpu
FROM ubuntu:22.04
as
cpu
FROM ubuntu:22.04
AS
cpu
RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
curl \
curl \
...
@@ -172,6 +172,6 @@ COPY --from=builder /usr/src/target/release-opt/text-generation-router /usr/loca
...
@@ -172,6 +172,6 @@ COPY --from=builder /usr/src/target/release-opt/text-generation-router /usr/loca
# Install launcher
# Install launcher
COPY --from=builder /usr/src/target/release-opt/text-generation-launcher /usr/local/bin/text-generation-launcher
COPY --from=builder /usr/src/target/release-opt/text-generation-launcher /usr/local/bin/text-generation-launcher
FROM ${PLATFORM}
as
final
FROM ${PLATFORM}
AS
final
ENTRYPOINT ["text-generation-launcher"]
ENTRYPOINT ["text-generation-launcher"]
CMD ["--json-output"]
CMD ["--json-output"]
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment