# SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 ARG BASE_IMAGE="nvcr.io/nvidia/cuda-dl-base" ARG BASE_IMAGE_TAG="25.01-cuda12.8-devel-ubuntu24.04" FROM ${BASE_IMAGE}:${BASE_IMAGE_TAG} AS dev USER root # Install utilities RUN apt update -y && apt install -y git wget curl nvtop tmux vim # nats RUN wget https://github.com/nats-io/nats-server/releases/download/v2.10.24/nats-server-v2.10.24-amd64.deb && dpkg -i nats-server-v2.10.24-amd64.deb # etcd ENV ETCD_VERSION="v3.5.18" RUN wget https://github.com/etcd-io/etcd/releases/download/$ETCD_VERSION/etcd-$ETCD_VERSION-linux-amd64.tar.gz -O /tmp/etcd.tar.gz && \ mkdir -p /usr/local/bin/etcd && \ tar -xvf /tmp/etcd.tar.gz -C /usr/local/bin/etcd --strip-components=1 ENV PATH=/usr/local/bin/etcd/:$PATH ### VIRTUAL ENVIRONMENT SETUP ### # Install uv and create virtualenv COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /bin/ RUN mkdir /opt/triton && \ uv venv /opt/triton/venv --python 3.12 # Activate virtual environment ENV VIRTUAL_ENV=/opt/triton/venv ENV PATH="${VIRTUAL_ENV}/bin:${PATH}" # Install patched vllm - keep this early in Dockerfile to avoid # rebuilds from unrelated source code changes ARG VLLM_REF="v0.7.2" ARG VLLM_PATCH="vllm_${VLLM_REF}-triton-kv-disagg-patch.patch" RUN --mount=type=bind,source=./container/deps/,target=/tmp/deps \ bash /tmp/deps/vllm/install.sh --patch /tmp/deps/vllm/${VLLM_PATCH} --ref ${VLLM_REF} --install-cmd "uv pip install --editable" --use-precompiled --installation-dir /opt/vllm # Install genai-perf for benchmarking ARG GENAI_PERF_TAG="r25.01" RUN uv pip install "git+https://github.com/triton-inference-server/perf_analyzer.git@${GENAI_PERF_TAG}#subdirectory=genai-perf" # Install test dependencies RUN --mount=type=bind,source=./container/deps/requirements.test.txt,target=/tmp/requirements.txt \ uv pip install --requirement /tmp/requirements.txt ### MISC UTILITY SETUP ### # Finish pyright install RUN pyright --help > /dev/null 2>&1 # Enable Git operations in the /workspace directory RUN printf "[safe]\n directory=/workspace\n" > /root/.gitconfig RUN ln -sf /bin/bash /bin/sh ### BUILDS ### # Rust build/dev dependencies RUN apt update -y && \ apt install -y \ build-essential \ protobuf-compiler && \ curl https://sh.rustup.rs -sSf | bash -s -- -y ENV PATH="/root/.cargo/bin:${PATH}" # Working directory WORKDIR /workspace COPY runtime/rust /workspace/runtime/rust RUN cd runtime/rust && \ cargo build --release --locked && cargo doc --no-deps # Build OpenAI HTTP Service binaries COPY llm/rust /workspace/llm/rust COPY examples/rust /workspace/examples/rust RUN cd examples/rust && \ cargo build --release && \ cp target/release/http /usr/local/bin/ && \ cp target/release/llmctl /usr/local/bin/ # TODO: Build tio # COPY applications/... # Generate C bindings for kv cache routing in vLLM COPY llm/rust /workspace/llm/rust RUN cd llm/rust/ && \ cargo build --release --locked && cargo doc --no-deps # Build triton_distributed_rs wheel COPY python-wheel /workspace/python-wheel RUN cd python-wheel && \ uv build && \ uv pip install dist/triton_distributed_rs*cp312*.whl # Package the bindings RUN mkdir -p /opt/triton/llm_binding/wheels && mkdir /opt/triton/llm_binding/lib RUN cp python-wheel/dist/triton_distributed_rs*cp312*.whl /opt/triton/llm_binding/wheels/. RUN cp llm/rust/target/release/libtriton_llm_capi.so /opt/triton/llm_binding/lib/. RUN cp -r llm/rust/libtriton-llm/include /opt/triton/llm_binding/. # Tell vllm to use the Triton LLM C API for KV Cache Routing ENV VLLM_KV_CAPI_PATH="/opt/triton/llm_binding/lib/libtriton_llm_capi.so" # FIXME: Copy more specific folders in for dev/debug after directory restructure COPY . /workspace # FIXME: May want a modification with triton-distributed banner on entry ENTRYPOINT ["/opt/nvidia/nvidia_entrypoint.sh"] CMD [] ### Lean Runtime Image Stage ### # FIXME: Separate build and runtime images FROM ${BASE_IMAGE}:${BASE_IMAGE_TAG} AS runtime USER root # Install tools for interactive convenience RUN apt update -y && \ apt install -y curl tmux vim && \ echo "set -g mouse on" >> /root/.tmux.conf # Set environment variables ENV VIRTUAL_ENV=/opt/triton/venv ENV PATH="${VIRTUAL_ENV}/bin:${PATH}" ENV RAPIDS_LIBUCX_PREFER_SYSTEM_LIBRARY=true ENV VLLM_KV_CAPI_PATH="/opt/triton/llm_binding/lib/libtriton_llm_capi.so" # Copy binaries COPY --from=dev /usr/local/bin/http /usr/local/bin/http COPY --from=dev /usr/local/bin/llmctl /usr/local/bin/llmctl COPY --from=dev /usr/local/bin/etcd/etcd /usr/local/bin/etcd COPY --from=dev /usr/bin/nats-server /usr/local/bin/nats-server COPY --from=dev /bin/uv /usr/local/bin/uv COPY --from=dev /bin/uvx /usr/local/bin/uvx # Copy venv with installed packages RUN uv python install 3.12 COPY --from=dev /opt/vllm /opt/vllm COPY --from=dev ${VIRTUAL_ENV} ${VIRTUAL_ENV} # Copy minimal set of files for testing. May consider separate stage for testing # if test dependencies start to negatively impact deployment environment/size. COPY pyproject.toml /workspace/pyproject.toml COPY container/deps/vllm /workspace/container/deps/vllm COPY python-wheel/python /workspace/python-wheel/python # Add library for KV routing COPY --from=dev ${VLLM_KV_CAPI_PATH} ${VLLM_KV_CAPI_PATH} # Copy minimal set of files for deployment/examples # FIXME: Use a more consolidated path after directory restructure COPY examples/python_rs/llm/vllm /workspace/examples/python_rs/llm/vllm WORKDIR /workspace # FIXME: May want a modification with triton-distributed banner on entry ENTRYPOINT ["/opt/nvidia/nvidia_entrypoint.sh"] CMD []