chore: Simplify the container build instructions for LLMAPI example (#87)

f784b36a · Tanmay Verma · GitHub · 28f3b1bb · f784b36a · f784b36a
Commit f784b36a authored Mar 11, 2025 by Tanmay Verma Committed by GitHub Mar 11, 2025
5 changed files
--- a/container/Dockerfile
+++ b/container/Dockerfile
@@ -72,24 +72,9 @@ RUN pip install "git+https://github.com/triton-inference-server/perf_analyzer.gi
 # Backend & Framework Specific Installation
 ARG FRAMEWORK="STANDARD"
-ARG TENSORRTLLM_BACKEND_REPO_TAG=
-ARG TENSORRTLLM_BACKEND_REBUILD=
-ARG TENSORRTLLM_SKIP_CLONE=
-ENV FRAMEWORK=${FRAMEWORK}
-RUN --mount=type=bind,source=./container/deps/requirements.tensorrtllm.txt,target=/tmp/requirements.txt \
-    --mount=type=bind,source=./container/deps/clone_tensorrtllm.sh,target=/tmp/clone_tensorrtllm.sh \
-    if [[ "$FRAMEWORK" == "TENSORRTLLM" ]] ; then pip install --timeout=2000 -r /tmp/requirements.txt; if [ ${TENSORRTLLM_SKIP_CLONE} -ne 1 ] ; then /tmp/clone_tensorrtllm.sh --tensorrtllm-backend-repo-tag ${TENSORRTLLM_BACKEND_REPO_TAG} --tensorrtllm-backend-rebuild ${TENSORRTLLM_BACKEND_REBUILD} --dynamo-llm-path /opt/dynamo/llm_binding ; fi ; fi
 RUN --mount=type=bind,source=./container/deps/requirements.standard.txt,target=/tmp/requirements.txt \
    if [[ "$FRAMEWORK" == "STANDARD" ]] ; then pip install --timeout=2000 -r /tmp/requirements.txt ; fi
-# Backend & Framework Specific LD_LIBRARY_PATH
-ARG TENSORRTLLM_FRAMEWORK
-ENV FRAMEWORK_LD_LIBRARY_PATH=${TENSORRTLLM_FRAMEWORK:+/opt/tritonserver/backends/tensorrtllm/}
-ENV LD_LIBRARY_PATH=${FRAMEWORK_LD_LIBRARY_PATH}:${LD_LIBRARY_PATH}
-ENV TENSORRTLLM_BACKEND_REPO_TAG=$TENSORRTLLM_BACKEND_REPO_TAG
-ENV TRTLLM_USE_MPI_KVCACHE=${TENSORRTLLM_FRAMEWORK:+"1"}
 # TODO set VLLM Version
 # ENV VLLM_VERSION

--- a/container/Dockerfile.tensorrt_llm
+++ b/container/Dockerfile.tensorrt_llm
+# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+ARG BASE_IMAGE="gitlab-master.nvidia.com:5005/dl/dgx/tritonserver/tensorrt-llm/amd64"
+ARG BASE_IMAGE_TAG="krish-fix-trtllm-build.23766174"
+FROM ${BASE_IMAGE}:${BASE_IMAGE_TAG} AS dev
+USER root
+# Install utilities
+RUN apt update -y && apt install -y git wget curl nvtop tmux vim
+# nats
+RUN wget https://github.com/nats-io/nats-server/releases/download/v2.10.24/nats-server-v2.10.24-amd64.deb && dpkg -i nats-server-v2.10.24-amd64.deb
+# etcd
+ENV ETCD_VERSION="v3.5.18"
+RUN wget https://github.com/etcd-io/etcd/releases/download/$ETCD_VERSION/etcd-$ETCD_VERSION-linux-amd64.tar.gz -O /tmp/etcd.tar.gz && \
+mkdir -p /usr/local/bin/etcd && \
+tar -xvf /tmp/etcd.tar.gz -C /usr/local/bin/etcd --strip-components=1
+ENV PATH=/usr/local/bin/etcd/:$PATH
+# TODO: Try using uv to install tensorrtllm
+ARG TENSORRTLLM_PIP_WHEEL_PATH=""
+COPY ${TENSORRTLLM_PIP_WHEEL_PATH}/*.whl /tmp/
+RUN find /tmp -name "*.whl" -exec pip install {} +
+# Install genai-perf for benchmarking
+# TODO: Move to tag when fix for genai-perf will be released
+ARG GENAI_PERF_TAG="25d0188713adc47868d6b3f22426375237a90529"
+RUN pip install "git+https://github.com/triton-inference-server/perf_analyzer.git@${GENAI_PERF_TAG}#subdirectory=genai-perf"
+# Install test dependencies
+RUN --mount=type=bind,source=./container/deps/requirements.test.txt,target=/tmp/requirements.txt \
+    pip install --requirement /tmp/requirements.txt
+### MISC UTILITY SETUP ###
+# Finish pyright install
+RUN pyright --help > /dev/null 2>&1
+# Enable Git operations in the /workspace directory
+RUN printf "[safe]\n      directory=/workspace\n" > /root/.gitconfig
+RUN ln -sf /bin/bash /bin/sh
+# Rust build/dev dependencies
+RUN apt-get update && \
+    apt-get install --no-install-recommends --yes  gdb protobuf-compiler cmake libssl-dev pkg-config
+RUN curl https://sh.rustup.rs -sSf | bash -s -- -y
+ENV PATH="/root/.cargo/bin:${PATH}"
+RUN rustup toolchain install 1.85.0-x86_64-unknown-linux-gnu
+# Working directory
+WORKDIR /workspace
+# Copy Python wheel configuration files
+COPY pyproject.toml /workspace/
+COPY README.md /workspace/
+COPY LICENSE /workspace/
+# Build Rust runtime
+COPY lib/runtime /workspace/lib/runtime
+RUN cd lib/runtime && \
+    cargo build --release --locked && cargo doc --no-deps
+# Build OpenAI HTTP Service binaries
+COPY lib/llm /workspace/lib/llm
+COPY examples/rust /workspace/examples/rust
+RUN cd examples/rust && \
+    cargo build --release && \
+    cp target/release/http /usr/local/bin/ && \
+    cp target/release/llmctl /usr/local/bin/
+# TODO: Build dynamo-run
+# COPY applications/...
+COPY deploy/dynamo/sdk /workspace/deploy/dynamo/sdk
+# Generate C bindings. Note that this is required for TRTLLM backend re-build
+COPY lib/bindings /workspace/lib/bindings
+RUN cd lib/bindings/c/ && \
+    cargo build --release --locked && cargo doc --no-deps
+# Install uv, create virtualenv for general use, and build dynamo wheel
+COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /bin/
+RUN mkdir /opt/dynamo && \
+    uv venv /opt/dynamo/venv --python 3.12 && \
+    source /opt/dynamo/venv/bin/activate && \
+    uv build --wheel --out-dir /workspace/dist && \
+    uv pip install /workspace/dist/dynamo*cp312*.whl && \
+    cd /workspace/deploy/dynamo/sdk && \
+    uv build --wheel --out-dir /workspace/dist && \
+    uv pip install /workspace/dist/dynamo_sdk*any.whl
+# Package the bindings
+RUN mkdir -p /opt/dynamo/bindings/wheels && \
+    mkdir /opt/dynamo/bindings/lib && \
+    cp dist/dynamo*cp312*.whl /opt/dynamo/bindings/wheels/. && \
+    cp lib/bindings/c/target/release/libdynamo_llm_capi.so /opt/dynamo/bindings/lib/. && \
+    cp -r lib/bindings/c/include /opt/dynamo/bindings/.
+# Install dynamo.runtime and dynamo.llm wheels globally in container for tests that
+# currently run without virtual environment activated.
+# TODO: In future, we may use a virtualenv for everything and remove this.
+RUN cd /opt/dynamo/bindings/wheels && \
+    pip install dynamo*cp312*.whl  && \
+    pip install /workspace/dist/dynamo_sdk*any.whl
+# Tell TRTLLM worker to use the Dynamo LLM C API for KV Cache Routing
+ENV DYNAMO_KV_CAPI_PATH="/opt/dynamo/bindings/lib/libdynamo_llm_capi.so"
+# FIXME: Copy more specific folders in for dev/debug after directory restructure
+COPY . /workspace
+# FIXME: May want a modification with dynamo banner on entry
+ENTRYPOINT ["/opt/nvidia/nvidia_entrypoint.sh"]
+CMD []
\ No newline at end of file
--- a/container/build.sh
+++ b/container/build.sh
@@ -57,19 +57,9 @@ STANDARD_BASE_IMAGE=nvcr.io/nvidia/tritonserver
 STANDARD_BASE_IMAGE_TAG=${STANDARD_BASE_VERSION}-py3
 TENSORRTLLM_BASE_VERSION=25.01
-TENSORRTLLM_BASE_IMAGE=nvcr.io/nvidia/tritonserver
+TENSORRTLLM_BASE_IMAGE="gitlab-master.nvidia.com:5005/dl/dgx/tritonserver/tensorrt-llm/amd64"
-TENSORRTLLM_BASE_IMAGE_TAG=${TENSORRTLLM_BASE_VERSION}-trtllm-python-py3
+TENSORRTLLM_BASE_IMAGE_TAG=krish-fix-trtllm-build.23766174
-# IMPORTANT NOTE: Ensure the repo tag complies with the TRTLLM backend version
+TENSORRTLLM_PIP_WHEEL_PATH=""
-# used in the base image above.
-TENSORRTLLM_BACKEND_REPO_TAG=triton-llm/v0.17.0
-# Set this as 1 to rebuild and replace trtllm backend bits in the container.
-# This will allow building Dynamo container image with custom
-# trt-llm backend repo branch.
-TENSORRTLLM_BACKEND_REBUILD=0
-# Set this as 1 to skip cloning the trt-llm backend repo. If cloning is skipped, trt-llm
-# backend repo tag and rebuild flag will be ignored. Use this option if you are using
-# trtllm llmapi worker.
-TENSORRTLLM_SKIP_CLONE=0
 VLLM_BASE_IMAGE="nvcr.io/nvidia/cuda-dl-base"
 VLLM_BASE_IMAGE_TAG="25.01-cuda12.8-devel-ubuntu24.04"
@@ -100,25 +90,9 @@ get_options() {
                missing_requirement $1
            fi
            ;;
-        --tensorrtllm-backend-repo-tag)
+        --tensorrtllm-pip-wheel-path)
            if [ "$2" ]; then
-                TRTLLM_BACKEND_COMMIT=$2
+                TENSORRTLLM_PIP_WHEEL_PATH=$2
-                shift
-            else
-                missing_requirement $1
-            fi
-            ;;
-        --tensorrtllm-backend-rebuild)
-            if [ "$2" ]; then
-                TRTLLM_BACKEND_REBUILD=$2
-                shift
-            else
-                missing_requirement $1
-            fi
-            ;;
-        --skip-clone-tensorrtllm)
-            if [ "$2" ]; then
-                TENSORRTLLM_SKIP_CLONE=$2
                shift
            else
                missing_requirement $1
@@ -270,9 +244,7 @@ show_image_options() {
    echo "   Base: '${BASE_IMAGE}'"
    echo "   Base_Image_Tag: '${BASE_IMAGE_TAG}'"
    if [[ $FRAMEWORK == "TENSORRTLLM" ]]; then
-        echo "   Tensorrtllm Backend Repo Tag: '${TENSORRTLLM_BACKEND_REPO_TAG}'"
+        echo "   Tensorrtllm_Pip_Wheel_Path: '${TENSORRTLLM_PIP_WHEEL_PATH}'"
-        echo "   Tensorrtllm Backend Rebuild: '${TENSORRTLLM_BACKEND_REBUILD}'"
-        echo "   Tensorrtllm Skip Clone: '${TENSORRTLLM_SKIP_CLONE}'"
    fi
    echo "   Build Context: '${BUILD_CONTEXT}'"
    echo "   Build Arguments: '${BUILD_ARGS}'"
@@ -286,9 +258,7 @@ show_help() {
    echo "  [--base-imge-tag base image tag]"
    echo "  [--platform platform for docker build"
    echo "  [--framework framework one of ${!FRAMEWORKS[@]}]"
-    echo "  [--tensorrtllm-backend-repo-tag commit or tag]"
+    echo "  [--tensorrtllm-pip-wheel-path path to tensorrtllm pip wheel]"
-    echo "  [--tensorrtllm-backend-rebuild whether or not to rebuild the backend]"
-    echo "  [--skip-clone-tensorrtllm whether or not to skip cloning the trt-llm backend repo]"
    echo "  [--build-arg additional build args to pass to docker build]"
    echo "  [--cache-from cache location to start from]"
    echo "  [--cache-to location where to cache the build output]"
@@ -316,6 +286,8 @@ if [[ $FRAMEWORK == "VLLM" ]]; then
    DOCKERFILE=${SOURCE_DIR}/Dockerfile.vllm
 elif [[ $FRAMEWORK == "VLLM_NIXL" ]]; then
    DOCKERFILE=${SOURCE_DIR}/Dockerfile.vllm_nixl
+elif [[ $FRAMEWORK == "TENSORRTLLM" ]]; then
+    DOCKERFILE=${SOURCE_DIR}/Dockerfile.tensorrt_llm
 fi
 # BUILD DEV IMAGE
@@ -330,10 +302,12 @@ if [ ! -z ${GITLAB_TOKEN} ]; then
    BUILD_ARGS+=" --build-arg GITLAB_TOKEN=${GITLAB_TOKEN} "
 fi
-if [[ $FRAMEWORK == "TENSORRTLLM" ]] && [ ! -z ${TENSORRTLLM_BACKEND_REPO_TAG} ]; then
+if [[ $FRAMEWORK == "TENSORRTLLM" ]]; then
-    BUILD_ARGS+=" --build-arg TENSORRTLLM_BACKEND_REPO_TAG=${TENSORRTLLM_BACKEND_REPO_TAG} "
+    if [ ! -z ${TENSORRTLLM_PIP_WHEEL_PATH} ]; then
-    BUILD_ARGS+=" --build-arg TENSORRTLLM_BACKEND_REBUILD=${TENSORRTLLM_BACKEND_REBUILD} "
+        BUILD_ARGS+=" --build-arg TENSORRTLLM_PIP_WHEEL_PATH=${TENSORRTLLM_PIP_WHEEL_PATH} "
-    BUILD_ARGS+=" --build-arg TENSORRTLLM_SKIP_CLONE=${TENSORRTLLM_SKIP_CLONE} "
+    else
+        error "ERROR: --tensorrtllm-pip-wheel-path is not provided"
+    fi
 fi
 if [ ! -z ${HF_TOKEN} ]; then

--- a/container/deps/clone_tensorrtllm.sh
+++ b/container/deps/clone_tensorrtllm.sh
-#!/bin/bash -e
-# SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-# SPDX-License-Identifier: Apache-2.0
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-TENSORRTLLM_BACKEND_REPO_TAG=
-TENSORRTLLM_BACKEND_REBUILD=
-DYNAMO_LLM_PATH=
-GIT_TOKEN=
-GIT_REPO=
-get_options() {
-    while :; do
-        case $1 in
-        -h | -\? | --help)
-            show_help
-            exit
-            ;;
-    --tensorrtllm-backend-repo-tag)
-            if [ "$2" ]; then
-                TENSORRTLLM_BACKEND_REPO_TAG=$2
-                shift
-            else
-		missing_requirement $1
-            fi
-            ;;
-    --tensorrtllm-backend-rebuild)
-            if [ "$2" ]; then
-                TENSORRTLLM_BACKEND_REBUILD=$2
-                shift
-            else
-		missing_requirement $1
-            fi
-            ;;
-    --dynamo-llm-path)
-            if [ "$2" ]; then
-                DYNAMO_LLM_PATH=$2
-                shift
-            else
-		missing_requirement $1
-            fi
-            ;;
-    --git-token)
-            if [ "$2" ]; then
-                GIT_TOKEN=$2
-                shift
-            else
-		missing_requirement $1
-            fi
-            ;;
-    --git-repo)
-            if [ "$2" ]; then
-                GIT_REPO=$2
-                shift
-            else
-		missing_requirement $1
-            fi
-            ;;
-         -?*)
-	    error 'ERROR: Unknown option: ' $1
-            ;;
-	 ?*)
-	    error 'ERROR: Unknown option: ' $1
-            ;;
-        *)
-            break
-            ;;
-        esac
-        shift
-    done
-}
-show_options() {
-    echo ""
-    echo "Getting TENSORRTLLM Backend Repo"
-    echo ""
-    echo "   Tensorrtllm Backend Repo Tag: '${TENSORRTLLM_BACKEND_REPO_TAG}'"
-    echo "   Tensorrtllm Backend Rebuild: '${TENSORRTLLM_BACKEND_REBUILD}'"
-    echo ""
-}
-show_help() {
-    echo "usage: clone_tensorrtllm.sh"
-    echo "  [--tensorrtllm-backend-repo-tag commit]"
-    echo "  [--tensorrtllm-backend-rebuild whether to rebuild backend]"
-    echo "  [--git-token git-token]"
-    echo "  [--git-repo git-repo]"
-    exit 0
-}
-missing_requirement() {
-    error "ERROR: $1 requires an argument."
-}
-error() {
-    printf '%s %s\n' "$1" "$2" >&2
-    exit 1
-}
-get_options "$@"
-if [ -z ${GIT_REPO} ]; then
-       GIT_REPO="github.com/triton-inference-server/tensorrtllm_backend"
-fi
-if [ ! -z ${GIT_TOKEN} ]; then
-    GIT_REPO="https://oauth2:${GIT_TOKEN}@${GIT_REPO}"
-else
-    GIT_REPO="https://${GIT_REPO}"
-fi
-show_options
-git clone ${GIT_REPO}
-cd tensorrtllm_backend
-git checkout ${TENSORRTLLM_BACKEND_REPO_TAG}
-git submodule update --init --recursive
-git lfs install
-git lfs pull
-if [ ! -z ${TENSORRTLLM_BACKEND_REBUILD} ]; then
-    # Install cmake
-    apt update -q=2 \
-	    && apt install -y gpg wget \
-        && wget -O - https://apt.kitware.com/keys/kitware-archive-latest.asc 2>/dev/null | gpg --dearmor - |  tee /usr/share/keyrings/kitware-archive-keyring.gpg >/dev/null \
-	    && . /etc/os-release \
-	    && echo "deb [signed-by=/usr/share/keyrings/kitware-archive-keyring.gpg] https://apt.kitware.com/ubuntu/ $UBUNTU_CODENAME main" | tee /etc/apt/sources.list.d/kitware.list >/dev/null \
-	    && apt-get update -q=2 \
-	    && apt-get install -y --no-install-recommends cmake=3.28.3* cmake-data=3.28.3* \
-        && cmake --version
-    # Install rapidjson
-    apt install -y rapidjson-dev
-    # Build the backend
-    (cd inflight_batcher_llm/src \
-        && cmake -DCMAKE_INSTALL_PREFIX:PATH=`pwd`/install -DUSE_CXX11_ABI=1 -DDYNAMO_LLM_PATH=$DYNAMO_LLM_PATH .. \
-        && make install \
-        && cp libtriton_tensorrtllm.so /opt/tritonserver/backends/tensorrtllm/ \
-        && cp trtllmExecutorWorker /opt/tritonserver/backends/tensorrtllm/ \
-    )
-fi
-cd ..
-mv tensorrtllm_backend /
--- a/examples/python_rs/llm/tensorrt_llm/README.md
+++ b/examples/python_rs/llm/tensorrt_llm/README.md
@@ -41,7 +41,10 @@ Start required services (etcd and NATS):
 TODO: Remove the internal references below.
- Build TRT-LLM wheel using latest tensorrt_llm main
+### Build the Dynamo container with latest TRT-LLM
+#### Step 1:Build TRT-LLM wheel using latest tensorrt_llm main
 ```
 git clone https://github.com/NVIDIA/TensorRT-LLM.git
@@ -58,26 +61,24 @@ python3 scripts/build_wheel.py --clean --trt_root /usr/local/tensorrt -a native
 cp build/tensorrt_llm-*.whl /home
 ```
- Build the Dynamo container
+####Step 2: Copy the TRT-LLM wheel to dynamo repository.
 ```bash
-# Build image
+cp /home/tensorrt_llm-*.whl /<path-to-repo>/dynamo/trtllm_wheel/
-./container/build.sh --base-image gitlab-master.nvidia.com:5005/dl/dgx/tritonserver/tensorrt-llm/amd64 --base-image-tag krish-fix-trtllm-build.23766174
 ```
-Alternatively, you can build with latest tensorrt_llm pipeline like below:
+####Step 3: Build the container
 ```bash
 # Build image
-./container/build.sh --framework TENSORRTLLM --skip-clone-tensorrtllm 1 --base-image urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm-staging/release --base-image-tag main
+./container/build.sh --framework TENSORRTLLM --tensorrtllm-pip-wheel-path trtllm_wheel
 ```
-**Note:** If you are using the latest tensorrt_llm image, you do not need to install the TRT-LLM wheel.
+We need to copy the TRT-LLM wheel to repository and point the build script to the path within
+the repository so that it can be picked by the docker build context.
 ## Launching the Environment
 ```
 # Run image interactively from with the Dynamo root directory.
-./container/run.sh --framework TENSORRTLLM -it -v /home/:/home/
+./container/run.sh --framework TENSORRTLLM -it
-# Install the TRT-LLM wheel. No need to do this if you are using the latest tensorrt_llm image.
-pip install /home/tensorrt_llm-*.whl
 ```
 ## Deployment Options