# Rust builder FROM image.sourcefind.cn:5000/dcu/admin/base/pytorch:2.1.0-centos7.6-dtk24.04-py310 as chef RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y ENV PATH /root/.cargo/bin:$PATH RUN cargo install cargo-chef WORKDIR /usr/src ARG CARGO_REGISTRIES_CRATES_IO_PROTOCOL=sparse FROM chef as planner COPY Cargo.toml Cargo.toml COPY rust-toolchain.toml rust-toolchain.toml COPY proto proto COPY benchmark benchmark COPY router router COPY launcher launcher RUN cargo chef prepare --recipe-path recipe.json FROM chef AS builder ARG GIT_SHA ARG DOCKER_LABEL RUN PROTOC_ZIP=protoc-21.12-linux-x86_64.zip && \ curl -OL https://github.com/protocolbuffers/protobuf/releases/download/v21.12/$PROTOC_ZIP && \ unzip -o $PROTOC_ZIP -d /usr/local bin/protoc && \ unzip -o $PROTOC_ZIP -d /usr/local 'include/*' && \ rm -f $PROTOC_ZIP COPY --from=planner /usr/src/recipe.json recipe.json RUN cargo chef cook --release --recipe-path recipe.json COPY Cargo.toml Cargo.toml COPY rust-toolchain.toml rust-toolchain.toml COPY proto proto COPY benchmark benchmark COPY router router COPY launcher launcher RUN cargo build --release # Text Generation Inference base image for RoCm FROM image.sourcefind.cn:5000/dcu/admin/base/pytorch:2.1.0-centos7.6-dtk24.04-py310 as base # Need hyhal while compiling WORKDIR /opt RUN wget https://cancon.hpccube.com:65024/directlink/1/DTK-23.10.1/hyhal.tar.gz && \ tar -xzf hyhal.tar.gz -C /opt ENV LD_LIBRARY_PATH /opt/hyhal/lib:/opt/hyhal/lib64:$LD_LIBRARY_PATH ENV PYTHONPATH /usr/local/lib/python3.10/site-packages:$PYTHONPATH FROM base AS kernel-builder # Build vllm kernels FROM kernel-builder AS vllm-builder WORKDIR /usr/src COPY server/vllm/ . # Build specific version of vllm RUN python setup.py build # Build Transformers CUDA kernels (gpt-neox and bloom) FROM kernel-builder as custom-kernels-builder WORKDIR /usr/src COPY server/custom_kernels/ . RUN python setup.py build # Build exllama kernels FROM kernel-builder as exllama-kernels-builder WORKDIR /usr/src COPY server/exllama_kernels/ . RUN python setup.py build # Build exllama v2 kernels FROM kernel-builder as exllamav2-kernels-builder WORKDIR /usr/src COPY server/exllamav2_kernels/ . RUN python setup.py build FROM base as base-copy # uninstall exist vllm in base docker image RUN pip uninstall -y vllm # Copy builds artifacts from vllm builder COPY --from=vllm-builder /usr/src/build/lib.linux-x86_64-cpython-310 /usr/local/lib/python3.10/site-packages # Copy build artifacts from custom kernels builder COPY --from=custom-kernels-builder /usr/src/build/lib.linux-x86_64-cpython-310 /usr/local/lib/python3.10/site-packages # Copy build artifacts from exllama kernels builder COPY --from=exllama-kernels-builder /usr/src/build/lib.linux-x86_64-cpython-310 /usr/local/lib/python3.10/site-packages # Copy build artifacts from exllamav2 kernels builder COPY --from=exllamav2-kernels-builder /usr/src/build/lib.linux-x86_64-cpython-310 /usr/local/lib/python3.10/site-packages # Install server RUN pip config set global.index-url https://pypi.tuna.tsinghua.edu.cn/simple COPY proto proto COPY server server COPY server/Makefile server/Makefile RUN cd server && \ make gen-server && \ pip install -r requirements_rocm.txt && \ pip install ".[accelerate, peft, outlines]" --no-cache-dir # Install benchmarker COPY --from=builder /usr/src/target/release/text-generation-benchmark /usr/local/bin/text-generation-benchmark # Install router COPY --from=builder /usr/src/target/release/text-generation-router /usr/local/bin/text-generation-router # Install launcher COPY --from=builder /usr/src/target/release/text-generation-launcher /usr/local/bin/text-generation-launcher #Remove default hyhal RUN rm -rf /opt/hyhal /opt/hyhal.tar.gz # AWS Sagemaker compatible image # FROM base-copy as sagemaker # COPY sagemaker-entrypoint.sh entrypoint.sh # RUN chmod +x entrypoint.sh # ENTRYPOINT ["./entrypoint.sh"] # # Final image # FROM base-copy # ENTRYPOINT ["text-generation-launcher"] # CMD ["--json-output"]