Commit f784b36a authored by Tanmay Verma's avatar Tanmay Verma Committed by GitHub
Browse files

chore: Simplify the container build instructions for LLMAPI example (#87)

parent 28f3b1bb
...@@ -72,24 +72,9 @@ RUN pip install "git+https://github.com/triton-inference-server/perf_analyzer.gi ...@@ -72,24 +72,9 @@ RUN pip install "git+https://github.com/triton-inference-server/perf_analyzer.gi
# Backend & Framework Specific Installation # Backend & Framework Specific Installation
ARG FRAMEWORK="STANDARD" ARG FRAMEWORK="STANDARD"
ARG TENSORRTLLM_BACKEND_REPO_TAG=
ARG TENSORRTLLM_BACKEND_REBUILD=
ARG TENSORRTLLM_SKIP_CLONE=
ENV FRAMEWORK=${FRAMEWORK}
RUN --mount=type=bind,source=./container/deps/requirements.tensorrtllm.txt,target=/tmp/requirements.txt \
--mount=type=bind,source=./container/deps/clone_tensorrtllm.sh,target=/tmp/clone_tensorrtllm.sh \
if [[ "$FRAMEWORK" == "TENSORRTLLM" ]] ; then pip install --timeout=2000 -r /tmp/requirements.txt; if [ ${TENSORRTLLM_SKIP_CLONE} -ne 1 ] ; then /tmp/clone_tensorrtllm.sh --tensorrtllm-backend-repo-tag ${TENSORRTLLM_BACKEND_REPO_TAG} --tensorrtllm-backend-rebuild ${TENSORRTLLM_BACKEND_REBUILD} --dynamo-llm-path /opt/dynamo/llm_binding ; fi ; fi
RUN --mount=type=bind,source=./container/deps/requirements.standard.txt,target=/tmp/requirements.txt \ RUN --mount=type=bind,source=./container/deps/requirements.standard.txt,target=/tmp/requirements.txt \
if [[ "$FRAMEWORK" == "STANDARD" ]] ; then pip install --timeout=2000 -r /tmp/requirements.txt ; fi if [[ "$FRAMEWORK" == "STANDARD" ]] ; then pip install --timeout=2000 -r /tmp/requirements.txt ; fi
# Backend & Framework Specific LD_LIBRARY_PATH
ARG TENSORRTLLM_FRAMEWORK
ENV FRAMEWORK_LD_LIBRARY_PATH=${TENSORRTLLM_FRAMEWORK:+/opt/tritonserver/backends/tensorrtllm/}
ENV LD_LIBRARY_PATH=${FRAMEWORK_LD_LIBRARY_PATH}:${LD_LIBRARY_PATH}
ENV TENSORRTLLM_BACKEND_REPO_TAG=$TENSORRTLLM_BACKEND_REPO_TAG
ENV TRTLLM_USE_MPI_KVCACHE=${TENSORRTLLM_FRAMEWORK:+"1"}
# TODO set VLLM Version # TODO set VLLM Version
# ENV VLLM_VERSION # ENV VLLM_VERSION
......
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
ARG BASE_IMAGE="gitlab-master.nvidia.com:5005/dl/dgx/tritonserver/tensorrt-llm/amd64"
ARG BASE_IMAGE_TAG="krish-fix-trtllm-build.23766174"
FROM ${BASE_IMAGE}:${BASE_IMAGE_TAG} AS dev
USER root
# Install utilities
RUN apt update -y && apt install -y git wget curl nvtop tmux vim
# nats
RUN wget https://github.com/nats-io/nats-server/releases/download/v2.10.24/nats-server-v2.10.24-amd64.deb && dpkg -i nats-server-v2.10.24-amd64.deb
# etcd
ENV ETCD_VERSION="v3.5.18"
RUN wget https://github.com/etcd-io/etcd/releases/download/$ETCD_VERSION/etcd-$ETCD_VERSION-linux-amd64.tar.gz -O /tmp/etcd.tar.gz && \
mkdir -p /usr/local/bin/etcd && \
tar -xvf /tmp/etcd.tar.gz -C /usr/local/bin/etcd --strip-components=1
ENV PATH=/usr/local/bin/etcd/:$PATH
# TODO: Try using uv to install tensorrtllm
ARG TENSORRTLLM_PIP_WHEEL_PATH=""
COPY ${TENSORRTLLM_PIP_WHEEL_PATH}/*.whl /tmp/
RUN find /tmp -name "*.whl" -exec pip install {} +
# Install genai-perf for benchmarking
# TODO: Move to tag when fix for genai-perf will be released
ARG GENAI_PERF_TAG="25d0188713adc47868d6b3f22426375237a90529"
RUN pip install "git+https://github.com/triton-inference-server/perf_analyzer.git@${GENAI_PERF_TAG}#subdirectory=genai-perf"
# Install test dependencies
RUN --mount=type=bind,source=./container/deps/requirements.test.txt,target=/tmp/requirements.txt \
pip install --requirement /tmp/requirements.txt
### MISC UTILITY SETUP ###
# Finish pyright install
RUN pyright --help > /dev/null 2>&1
# Enable Git operations in the /workspace directory
RUN printf "[safe]\n directory=/workspace\n" > /root/.gitconfig
RUN ln -sf /bin/bash /bin/sh
# Rust build/dev dependencies
RUN apt-get update && \
apt-get install --no-install-recommends --yes gdb protobuf-compiler cmake libssl-dev pkg-config
RUN curl https://sh.rustup.rs -sSf | bash -s -- -y
ENV PATH="/root/.cargo/bin:${PATH}"
RUN rustup toolchain install 1.85.0-x86_64-unknown-linux-gnu
# Working directory
WORKDIR /workspace
# Copy Python wheel configuration files
COPY pyproject.toml /workspace/
COPY README.md /workspace/
COPY LICENSE /workspace/
# Build Rust runtime
COPY lib/runtime /workspace/lib/runtime
RUN cd lib/runtime && \
cargo build --release --locked && cargo doc --no-deps
# Build OpenAI HTTP Service binaries
COPY lib/llm /workspace/lib/llm
COPY examples/rust /workspace/examples/rust
RUN cd examples/rust && \
cargo build --release && \
cp target/release/http /usr/local/bin/ && \
cp target/release/llmctl /usr/local/bin/
# TODO: Build dynamo-run
# COPY applications/...
COPY deploy/dynamo/sdk /workspace/deploy/dynamo/sdk
# Generate C bindings. Note that this is required for TRTLLM backend re-build
COPY lib/bindings /workspace/lib/bindings
RUN cd lib/bindings/c/ && \
cargo build --release --locked && cargo doc --no-deps
# Install uv, create virtualenv for general use, and build dynamo wheel
COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /bin/
RUN mkdir /opt/dynamo && \
uv venv /opt/dynamo/venv --python 3.12 && \
source /opt/dynamo/venv/bin/activate && \
uv build --wheel --out-dir /workspace/dist && \
uv pip install /workspace/dist/dynamo*cp312*.whl && \
cd /workspace/deploy/dynamo/sdk && \
uv build --wheel --out-dir /workspace/dist && \
uv pip install /workspace/dist/dynamo_sdk*any.whl
# Package the bindings
RUN mkdir -p /opt/dynamo/bindings/wheels && \
mkdir /opt/dynamo/bindings/lib && \
cp dist/dynamo*cp312*.whl /opt/dynamo/bindings/wheels/. && \
cp lib/bindings/c/target/release/libdynamo_llm_capi.so /opt/dynamo/bindings/lib/. && \
cp -r lib/bindings/c/include /opt/dynamo/bindings/.
# Install dynamo.runtime and dynamo.llm wheels globally in container for tests that
# currently run without virtual environment activated.
# TODO: In future, we may use a virtualenv for everything and remove this.
RUN cd /opt/dynamo/bindings/wheels && \
pip install dynamo*cp312*.whl && \
pip install /workspace/dist/dynamo_sdk*any.whl
# Tell TRTLLM worker to use the Dynamo LLM C API for KV Cache Routing
ENV DYNAMO_KV_CAPI_PATH="/opt/dynamo/bindings/lib/libdynamo_llm_capi.so"
# FIXME: Copy more specific folders in for dev/debug after directory restructure
COPY . /workspace
# FIXME: May want a modification with dynamo banner on entry
ENTRYPOINT ["/opt/nvidia/nvidia_entrypoint.sh"]
CMD []
\ No newline at end of file
...@@ -57,19 +57,9 @@ STANDARD_BASE_IMAGE=nvcr.io/nvidia/tritonserver ...@@ -57,19 +57,9 @@ STANDARD_BASE_IMAGE=nvcr.io/nvidia/tritonserver
STANDARD_BASE_IMAGE_TAG=${STANDARD_BASE_VERSION}-py3 STANDARD_BASE_IMAGE_TAG=${STANDARD_BASE_VERSION}-py3
TENSORRTLLM_BASE_VERSION=25.01 TENSORRTLLM_BASE_VERSION=25.01
TENSORRTLLM_BASE_IMAGE=nvcr.io/nvidia/tritonserver TENSORRTLLM_BASE_IMAGE="gitlab-master.nvidia.com:5005/dl/dgx/tritonserver/tensorrt-llm/amd64"
TENSORRTLLM_BASE_IMAGE_TAG=${TENSORRTLLM_BASE_VERSION}-trtllm-python-py3 TENSORRTLLM_BASE_IMAGE_TAG=krish-fix-trtllm-build.23766174
# IMPORTANT NOTE: Ensure the repo tag complies with the TRTLLM backend version TENSORRTLLM_PIP_WHEEL_PATH=""
# used in the base image above.
TENSORRTLLM_BACKEND_REPO_TAG=triton-llm/v0.17.0
# Set this as 1 to rebuild and replace trtllm backend bits in the container.
# This will allow building Dynamo container image with custom
# trt-llm backend repo branch.
TENSORRTLLM_BACKEND_REBUILD=0
# Set this as 1 to skip cloning the trt-llm backend repo. If cloning is skipped, trt-llm
# backend repo tag and rebuild flag will be ignored. Use this option if you are using
# trtllm llmapi worker.
TENSORRTLLM_SKIP_CLONE=0
VLLM_BASE_IMAGE="nvcr.io/nvidia/cuda-dl-base" VLLM_BASE_IMAGE="nvcr.io/nvidia/cuda-dl-base"
VLLM_BASE_IMAGE_TAG="25.01-cuda12.8-devel-ubuntu24.04" VLLM_BASE_IMAGE_TAG="25.01-cuda12.8-devel-ubuntu24.04"
...@@ -100,25 +90,9 @@ get_options() { ...@@ -100,25 +90,9 @@ get_options() {
missing_requirement $1 missing_requirement $1
fi fi
;; ;;
--tensorrtllm-backend-repo-tag) --tensorrtllm-pip-wheel-path)
if [ "$2" ]; then if [ "$2" ]; then
TRTLLM_BACKEND_COMMIT=$2 TENSORRTLLM_PIP_WHEEL_PATH=$2
shift
else
missing_requirement $1
fi
;;
--tensorrtllm-backend-rebuild)
if [ "$2" ]; then
TRTLLM_BACKEND_REBUILD=$2
shift
else
missing_requirement $1
fi
;;
--skip-clone-tensorrtllm)
if [ "$2" ]; then
TENSORRTLLM_SKIP_CLONE=$2
shift shift
else else
missing_requirement $1 missing_requirement $1
...@@ -270,9 +244,7 @@ show_image_options() { ...@@ -270,9 +244,7 @@ show_image_options() {
echo " Base: '${BASE_IMAGE}'" echo " Base: '${BASE_IMAGE}'"
echo " Base_Image_Tag: '${BASE_IMAGE_TAG}'" echo " Base_Image_Tag: '${BASE_IMAGE_TAG}'"
if [[ $FRAMEWORK == "TENSORRTLLM" ]]; then if [[ $FRAMEWORK == "TENSORRTLLM" ]]; then
echo " Tensorrtllm Backend Repo Tag: '${TENSORRTLLM_BACKEND_REPO_TAG}'" echo " Tensorrtllm_Pip_Wheel_Path: '${TENSORRTLLM_PIP_WHEEL_PATH}'"
echo " Tensorrtllm Backend Rebuild: '${TENSORRTLLM_BACKEND_REBUILD}'"
echo " Tensorrtllm Skip Clone: '${TENSORRTLLM_SKIP_CLONE}'"
fi fi
echo " Build Context: '${BUILD_CONTEXT}'" echo " Build Context: '${BUILD_CONTEXT}'"
echo " Build Arguments: '${BUILD_ARGS}'" echo " Build Arguments: '${BUILD_ARGS}'"
...@@ -286,9 +258,7 @@ show_help() { ...@@ -286,9 +258,7 @@ show_help() {
echo " [--base-imge-tag base image tag]" echo " [--base-imge-tag base image tag]"
echo " [--platform platform for docker build" echo " [--platform platform for docker build"
echo " [--framework framework one of ${!FRAMEWORKS[@]}]" echo " [--framework framework one of ${!FRAMEWORKS[@]}]"
echo " [--tensorrtllm-backend-repo-tag commit or tag]" echo " [--tensorrtllm-pip-wheel-path path to tensorrtllm pip wheel]"
echo " [--tensorrtllm-backend-rebuild whether or not to rebuild the backend]"
echo " [--skip-clone-tensorrtllm whether or not to skip cloning the trt-llm backend repo]"
echo " [--build-arg additional build args to pass to docker build]" echo " [--build-arg additional build args to pass to docker build]"
echo " [--cache-from cache location to start from]" echo " [--cache-from cache location to start from]"
echo " [--cache-to location where to cache the build output]" echo " [--cache-to location where to cache the build output]"
...@@ -316,6 +286,8 @@ if [[ $FRAMEWORK == "VLLM" ]]; then ...@@ -316,6 +286,8 @@ if [[ $FRAMEWORK == "VLLM" ]]; then
DOCKERFILE=${SOURCE_DIR}/Dockerfile.vllm DOCKERFILE=${SOURCE_DIR}/Dockerfile.vllm
elif [[ $FRAMEWORK == "VLLM_NIXL" ]]; then elif [[ $FRAMEWORK == "VLLM_NIXL" ]]; then
DOCKERFILE=${SOURCE_DIR}/Dockerfile.vllm_nixl DOCKERFILE=${SOURCE_DIR}/Dockerfile.vllm_nixl
elif [[ $FRAMEWORK == "TENSORRTLLM" ]]; then
DOCKERFILE=${SOURCE_DIR}/Dockerfile.tensorrt_llm
fi fi
# BUILD DEV IMAGE # BUILD DEV IMAGE
...@@ -330,10 +302,12 @@ if [ ! -z ${GITLAB_TOKEN} ]; then ...@@ -330,10 +302,12 @@ if [ ! -z ${GITLAB_TOKEN} ]; then
BUILD_ARGS+=" --build-arg GITLAB_TOKEN=${GITLAB_TOKEN} " BUILD_ARGS+=" --build-arg GITLAB_TOKEN=${GITLAB_TOKEN} "
fi fi
if [[ $FRAMEWORK == "TENSORRTLLM" ]] && [ ! -z ${TENSORRTLLM_BACKEND_REPO_TAG} ]; then if [[ $FRAMEWORK == "TENSORRTLLM" ]]; then
BUILD_ARGS+=" --build-arg TENSORRTLLM_BACKEND_REPO_TAG=${TENSORRTLLM_BACKEND_REPO_TAG} " if [ ! -z ${TENSORRTLLM_PIP_WHEEL_PATH} ]; then
BUILD_ARGS+=" --build-arg TENSORRTLLM_BACKEND_REBUILD=${TENSORRTLLM_BACKEND_REBUILD} " BUILD_ARGS+=" --build-arg TENSORRTLLM_PIP_WHEEL_PATH=${TENSORRTLLM_PIP_WHEEL_PATH} "
BUILD_ARGS+=" --build-arg TENSORRTLLM_SKIP_CLONE=${TENSORRTLLM_SKIP_CLONE} " else
error "ERROR: --tensorrtllm-pip-wheel-path is not provided"
fi
fi fi
if [ ! -z ${HF_TOKEN} ]; then if [ ! -z ${HF_TOKEN} ]; then
......
#!/bin/bash -e
# SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
TENSORRTLLM_BACKEND_REPO_TAG=
TENSORRTLLM_BACKEND_REBUILD=
DYNAMO_LLM_PATH=
GIT_TOKEN=
GIT_REPO=
get_options() {
while :; do
case $1 in
-h | -\? | --help)
show_help
exit
;;
--tensorrtllm-backend-repo-tag)
if [ "$2" ]; then
TENSORRTLLM_BACKEND_REPO_TAG=$2
shift
else
missing_requirement $1
fi
;;
--tensorrtllm-backend-rebuild)
if [ "$2" ]; then
TENSORRTLLM_BACKEND_REBUILD=$2
shift
else
missing_requirement $1
fi
;;
--dynamo-llm-path)
if [ "$2" ]; then
DYNAMO_LLM_PATH=$2
shift
else
missing_requirement $1
fi
;;
--git-token)
if [ "$2" ]; then
GIT_TOKEN=$2
shift
else
missing_requirement $1
fi
;;
--git-repo)
if [ "$2" ]; then
GIT_REPO=$2
shift
else
missing_requirement $1
fi
;;
-?*)
error 'ERROR: Unknown option: ' $1
;;
?*)
error 'ERROR: Unknown option: ' $1
;;
*)
break
;;
esac
shift
done
}
show_options() {
echo ""
echo "Getting TENSORRTLLM Backend Repo"
echo ""
echo " Tensorrtllm Backend Repo Tag: '${TENSORRTLLM_BACKEND_REPO_TAG}'"
echo " Tensorrtllm Backend Rebuild: '${TENSORRTLLM_BACKEND_REBUILD}'"
echo ""
}
show_help() {
echo "usage: clone_tensorrtllm.sh"
echo " [--tensorrtllm-backend-repo-tag commit]"
echo " [--tensorrtllm-backend-rebuild whether to rebuild backend]"
echo " [--git-token git-token]"
echo " [--git-repo git-repo]"
exit 0
}
missing_requirement() {
error "ERROR: $1 requires an argument."
}
error() {
printf '%s %s\n' "$1" "$2" >&2
exit 1
}
get_options "$@"
if [ -z ${GIT_REPO} ]; then
GIT_REPO="github.com/triton-inference-server/tensorrtllm_backend"
fi
if [ ! -z ${GIT_TOKEN} ]; then
GIT_REPO="https://oauth2:${GIT_TOKEN}@${GIT_REPO}"
else
GIT_REPO="https://${GIT_REPO}"
fi
show_options
git clone ${GIT_REPO}
cd tensorrtllm_backend
git checkout ${TENSORRTLLM_BACKEND_REPO_TAG}
git submodule update --init --recursive
git lfs install
git lfs pull
if [ ! -z ${TENSORRTLLM_BACKEND_REBUILD} ]; then
# Install cmake
apt update -q=2 \
&& apt install -y gpg wget \
&& wget -O - https://apt.kitware.com/keys/kitware-archive-latest.asc 2>/dev/null | gpg --dearmor - | tee /usr/share/keyrings/kitware-archive-keyring.gpg >/dev/null \
&& . /etc/os-release \
&& echo "deb [signed-by=/usr/share/keyrings/kitware-archive-keyring.gpg] https://apt.kitware.com/ubuntu/ $UBUNTU_CODENAME main" | tee /etc/apt/sources.list.d/kitware.list >/dev/null \
&& apt-get update -q=2 \
&& apt-get install -y --no-install-recommends cmake=3.28.3* cmake-data=3.28.3* \
&& cmake --version
# Install rapidjson
apt install -y rapidjson-dev
# Build the backend
(cd inflight_batcher_llm/src \
&& cmake -DCMAKE_INSTALL_PREFIX:PATH=`pwd`/install -DUSE_CXX11_ABI=1 -DDYNAMO_LLM_PATH=$DYNAMO_LLM_PATH .. \
&& make install \
&& cp libtriton_tensorrtllm.so /opt/tritonserver/backends/tensorrtllm/ \
&& cp trtllmExecutorWorker /opt/tritonserver/backends/tensorrtllm/ \
)
fi
cd ..
mv tensorrtllm_backend /
...@@ -41,7 +41,10 @@ Start required services (etcd and NATS): ...@@ -41,7 +41,10 @@ Start required services (etcd and NATS):
TODO: Remove the internal references below. TODO: Remove the internal references below.
- Build TRT-LLM wheel using latest tensorrt_llm main
### Build the Dynamo container with latest TRT-LLM
#### Step 1:Build TRT-LLM wheel using latest tensorrt_llm main
``` ```
git clone https://github.com/NVIDIA/TensorRT-LLM.git git clone https://github.com/NVIDIA/TensorRT-LLM.git
...@@ -58,26 +61,24 @@ python3 scripts/build_wheel.py --clean --trt_root /usr/local/tensorrt -a native ...@@ -58,26 +61,24 @@ python3 scripts/build_wheel.py --clean --trt_root /usr/local/tensorrt -a native
cp build/tensorrt_llm-*.whl /home cp build/tensorrt_llm-*.whl /home
``` ```
- Build the Dynamo container ####Step 2: Copy the TRT-LLM wheel to dynamo repository.
```bash ```bash
# Build image cp /home/tensorrt_llm-*.whl /<path-to-repo>/dynamo/trtllm_wheel/
./container/build.sh --base-image gitlab-master.nvidia.com:5005/dl/dgx/tritonserver/tensorrt-llm/amd64 --base-image-tag krish-fix-trtllm-build.23766174
``` ```
Alternatively, you can build with latest tensorrt_llm pipeline like below: ####Step 3: Build the container
```bash ```bash
# Build image # Build image
./container/build.sh --framework TENSORRTLLM --skip-clone-tensorrtllm 1 --base-image urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm-staging/release --base-image-tag main ./container/build.sh --framework TENSORRTLLM --tensorrtllm-pip-wheel-path trtllm_wheel
``` ```
**Note:** If you are using the latest tensorrt_llm image, you do not need to install the TRT-LLM wheel.
We need to copy the TRT-LLM wheel to repository and point the build script to the path within
the repository so that it can be picked by the docker build context.
## Launching the Environment ## Launching the Environment
``` ```
# Run image interactively from with the Dynamo root directory. # Run image interactively from with the Dynamo root directory.
./container/run.sh --framework TENSORRTLLM -it -v /home/:/home/ ./container/run.sh --framework TENSORRTLLM -it
# Install the TRT-LLM wheel. No need to do this if you are using the latest tensorrt_llm image.
pip install /home/tensorrt_llm-*.whl
``` ```
## Deployment Options ## Deployment Options
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment