build: adding in container build scripts

d39695b7 · Neelay Shah · GitHub · 1e4ef91a · d39695b7 · d39695b7
Commit d39695b7 authored Jan 08, 2025 by Neelay Shah Committed by GitHub Jan 08, 2025
11 changed files
--- a/.dockerignore
+++ b/.dockerignore
+**/*.onnx
+**/*.plan
+**/*.onnx
+**/*.plan
+**/.cache/*
+**/*onnx*
+**/*engine*
+**/*pytorch_model*
+**/*.pth*
+**/*.pt
+**/*.models/*
+**/*.model-store/*
+**/*.model.*/*
+**/*.cache/*
+**/*.libtorch_model_store/*
+**/.git
+**/.github
+**/*backup*/
+.dockerignore
\ No newline at end of file
--- a/README.md
+++ b/README.md
 <!--
-SPDX-FileCopyrightText: Copyright (c) 2022-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+SPDX-FileCopyrightText: Copyright (c) 2022-2024 NVIDIA CORPORATION & AFFILIATES.
 SPDX-License-Identifier: Apache-2.0
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
 -->


@@ -35,6 +23,24 @@ center scale without sacrificing performance or ease of use.
 > rapid-prototyping stage and we are actively looking for feedback and
 > collaborators.

+## Building Triton Distributed
+
+Triton Distributed development and examples are container based.
+
+You can build the Triton Distributed container using the build scripts in `container/`. 
+
+We provide 3 types of builds: 
+
+1. `STANDARD` which includes our default set of backends (onnx, openvino...)
+2. `TENSORRTLLM` which includes our TRT-LLM backend
+3. `VLLM` which includes our VLLM backend
+
+For example, if you want to build a container for the `VLLM` backend you can run 
+
+`./container/build.sh --framework VLLM`
+
+Please see the instructions in the corresponding example for specific build instructions.
+
 <!--

 ## Goals

--- a/container/Dockerfile
+++ b/container/Dockerfile
+# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES.
+# SPDX-License-Identifier: Apache-2.0
+
+ARG BASE_IMAGE="nvcr.io/nvidia/tritonserver"
+ARG BASE_IMAGE_TAG="24.12-py3"
+
+FROM ${BASE_IMAGE}:${BASE_IMAGE_TAG} AS triton-distributed
+
+# TODO: non root user by default
+
+USER root
+
+# TODO: seperate dev from runtime dependendcies
+
+RUN apt-get update; apt-get install -y gdb
+
+# Common dependencies
+RUN --mount=type=bind,source=./container/deps/requirements.txt,target=/tmp/requirements.txt \
+    pip install --timeout=2000 --requirement /tmp/requirements.txt
+RUN --mount=type=bind,source=./container/deps/requirements.nats.txt,target=/tmp/requirements.txt \
+    pip install --timeout=2000 --requirement /tmp/requirements.txt
+RUN --mount=type=bind,source=./container/deps/requirements.test.txt,target=/tmp/requirements.txt \
+    pip install --timeout=2000 --requirement /tmp/requirements.txt
+
+# Finish pyright install
+RUN pyright --help > /dev/null 2>&1
+
+# In Process Python API Install
+RUN find /opt/tritonserver/python -maxdepth 1 -type f -name \
+    "tritonserver-*.whl" | xargs -I {} pip3 install --force-reinstall --upgrade {}[all]
+
+# GENAI Perf Install
+ARG GENAI_PERF_TAG="r24.12"
+RUN pip install "git+https://github.com/triton-inference-server/perf_analyzer.git@${GENAI_PERF_TAG}#subdirectory=genai-perf"
+
+# Backend & Framework Specific Installation
+ARG FRAMEWORK="STANDARD"
+ARG TENSORRTLLM_BACKEND_COMMIT=
+RUN --mount=type=bind,source=./container/deps/requirements.tensorrtllm.txt,target=/tmp/requirements.txt \
+    --mount=type=bind,source=./container/deps/clone_tensorrtllm.sh,target=/tmp/clone_tensorrtllm.sh \
+    if [[ "$FRAMEWORK" == "TENSORRTLLM" ]] ; then pip install --timeout=2000 -r /tmp/requirements.txt; /tmp/clone_tensorrtllm.sh --tensorrtllm-backend-commit ${TENSORRTLLM_BACKEND_COMMIT} ; fi
+
+RUN --mount=type=bind,source=./container/deps/requirements.vllm.txt,target=/tmp/requirements.txt \
+    if [[ "$FRAMEWORK" == "VLLM" ]] ; then pip install --timeout=2000 -r /tmp/requirements.txt ; fi
+
+RUN --mount=type=bind,source=./container/deps/requirements.standard.txt,target=/tmp/requirements.txt \
+    if [[ "$FRAMEWORK" == "STANDARD" ]] ; then pip install --timeout=2000 -r /tmp/requirements.txt ; fi
+
+# Backend & Framework Specific LD_LIBRARY_PATH
+ARG TENSORRTLLM_FRAMEWORK
+ENV FRAMEWORK_LD_LIBRARY_PATH=${TENSORRTLLM_FRAMEWORK:+/opt/tritonserver/backends/tensorrtllm/}
+ENV LD_LIBRARY_PATH=${FRAMEWORK_LD_LIBRARY_PATH}:${LD_LIBRARY_PATH}
+ENV TENSORRTLLM_BACKEND_COMMIT=$TENSORRTLLM_BACKEND_COMMIT
+
+# Install NATS - pointing toward NATS github instead of binaries.nats.dev due to server instability
+RUN wget https://github.com/nats-io/nats-server/releases/download/v2.10.24/nats-server-v2.10.24-amd64.deb && dpkg -i nats-server-v2.10.24-amd64.deb
+
+# Enable Git operations in the /workspace directory.
+RUN printf "[safe]\n      directory=/workspace\n" > /root/.gitconfig
+
+# emacs docker-tramp requires /bin/sh to be linked to bash to operate correctly
+RUN ln -sf /bin/bash /bin/sh
+
+# Install NGINX
+RUN apt-get install nginx -y
+RUN rm -rf /etc/nginx/sites-enabled/default
+
+# Install demo utils
+RUN apt-get install nvtop -y
+RUN apt-get install tmux -y
+
+# Install OpenAI-compatible frontend and its dependencies
+# NOTE: Has a couple compat fixes needed for kserve frontends in 24.11 release.
+#       Can be replaced with specific release like r24.12 in future for stability.
+ARG SERVER_OPENAI_COMMIT="f336fa6bd5416ba2f17e5eb7de5228213562bbc8"
+WORKDIR /opt/tritonserver
+RUN git clone https://github.com/triton-inference-server/server.git && \
+    cd server && \
+    git checkout ${SERVER_OPENAI_COMMIT} && \
+    cd .. && \
+    mv server/python/openai openai && \
+    chown -R root:root openai && \
+    chmod 755 openai && \
+    chmod -R go-w openai && \
+    rm -rf server && \
+    python3 -m pip install -r openai/requirements.txt
+
+##########################################################
+# Tokenizers                                             #
+##########################################################
+
+# Todo Pull in from network and not local to repo
+#
+# RUN --mount=type=bind,source=./container/deps/llama3-tok.tgz,target=/tmp/llama3-tok.tgz \
+#    mkdir -p /genai-perf/tokenizers && cd /genai-perf/tokenizers && tar -xzf /tmp/llama3-tok.tgz
+
+# Working directory
+WORKDIR /workspace
+
+#TODO Exclude container directory
+COPY . /workspace
+
+
+# TODO Protobuf
+# RUN /workspace/icp/protos/gen_python.sh
+
+# TODO 
+# Sets pythonpath for python modules
+#ENV PYTHONPATH="${PYTHONPATH}:/workspace/icp/python:/workspace/icp/python/icp/protos:/workspace/worker/python:/workspace/triton/api_server/src/:/workspaces/tekit_backend/tensorrt_llm"
+
+# Command and Entrypoint
+CMD []
+ENTRYPOINT ["/opt/nvidia/nvidia_entrypoint.sh"]
--- a/container/build.sh
+++ b/container/build.sh
+#!/bin/bash -e
+# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES.
+# SPDX-License-Identifier: Apache-2.0
+
+TAG=
+RUN_PREFIX=
+PLATFORM=linux/amd64
+
+# Frameworks
+#
+# Each framework has a corresponding base image.  Additional
+# dependencies are specified in the /container/deps folder and
+# installed within framework specific sections of the Dockerfile.
+ 
+declare -A FRAMEWORKS=(["STANDARD"]=1 ["TENSORRTLLM"]=2 ["VLLM"]=3)
+DEFAULT_FRAMEWORK=STANDARD
+
+SOURCE_DIR=$(dirname "$(readlink -f "$0")")
+DOCKERFILE=${SOURCE_DIR}/Dockerfile
+BUILD_CONTEXT=$(dirname "$(readlink -f "$SOURCE_DIR")")
+
+# Base Images
+
+STANDARD_BASE_VERSION=24.12
+STANDARD_BASE_IMAGE=nvcr.io/nvidia/tritonserver
+STANDARD_BASE_IMAGE_TAG=${STANDARD_BASE_VERSION}-py3
+
+TENSORRTLLM_BASE_VERSION=24.12
+TENSORRTLLM_BASE_IMAGE=nvcr.io/nvidia/tritonserver
+TENSORRTLLM_BASE_IMAGE_TAG=${TENSORRTLLM_BASE_VERSION}-trtllm-python-py3
+# IMPORTANT NOTE: Ensure the commit matches the TRTLLM backend version used in the base image above
+TENSORRTLLM_BACKEND_COMMIT=v0.16.0
+
+VLLM_BASE_VERSION=24.12
+VLLM_BASE_IMAGE=nvcr.io/nvidia/tritonserver
+VLLM_BASE_IMAGE_TAG=${VLLM_BASE_VERSION}-vllm-python-py3
+
+get_options() {
+    while :; do
+        case $1 in
+        -h | -\? | --help)
+            show_help
+            exit
+            ;;
+	--platform)
+            if [ "$2" ]; then
+                PLATFORM=$2
+                shift
+            else
+		missing_requirement $1
+            fi
+            ;;
+	--framework)
+            if [ "$2" ]; then
+                FRAMEWORK=$2
+                shift
+            else
+		missing_requirement $1
+            fi
+            ;;
+        --tensorrtllm-backend-commit)
+            if [ "$2" ]; then
+                TRTLLM_BACKEND_COMMIT=$2
+                shift
+            else
+		missing_requirement $1
+            fi
+            ;;
+        --base-image)
+            if [ "$2" ]; then
+                BASE_IMAGE=$2
+                shift
+            else
+		missing_requirement $1
+            fi
+            ;;
+	--base-image-tag)
+            if [ "$2" ]; then
+                BASE_IMAGE_TAG=$2
+                shift
+            else
+		missing_requirement $1
+            fi
+            ;;
+        --build-arg)
+            if [ "$2" ]; then
+                BUILD_ARGS+="--build-arg $2 "
+                shift
+            else
+		missing_requirement $1
+            fi
+            ;;
+        --tag)
+            if [ "$2" ]; then
+                TAG=$2
+                shift
+            else
+		missing_requirement $1
+            fi
+            ;;
+        --dry-run)
+            RUN_PREFIX="echo"
+            echo ""
+            echo "=============================="
+            echo "DRY RUN: COMMANDS PRINTED ONLY"
+            echo "=============================="
+            echo ""
+            ;;
+	--no-cache)
+	    NO_CACHE=" --no-cache"
+            ;;
+        --)
+            shift
+            break
+            ;;
+         -?*)
+	    error 'ERROR: Unknown option: ' $1
+            ;;
+	 ?*)
+	    error 'ERROR: Unknown option: ' $1
+            ;;
+        *)
+            break
+            ;;
+        esac
+        shift
+    done
+
+    if [ -z "$FRAMEWORK" ]; then
+	FRAMEWORK=$DEFAULT_FRAMEWORK
+    fi
+
+    if [ ! -z "$FRAMEWORK" ]; then
+	FRAMEWORK=${FRAMEWORK^^}
+
+	if [[ ! -n "${FRAMEWORKS[$FRAMEWORK]}" ]]; then
+	    error 'ERROR: Unknown framework: ' $FRAMEWORK
+	fi
+	
+	if [ -z $BASE_IMAGE_TAG ]; then
+	    BASE_IMAGE_TAG=${FRAMEWORK}_BASE_IMAGE_TAG
+	    BASE_IMAGE_TAG=${!BASE_IMAGE_TAG}
+	fi
+
+	if [ -z $BASE_IMAGE ]; then
+	    BASE_IMAGE=${FRAMEWORK}_BASE_IMAGE
+	    BASE_IMAGE=${!BASE_IMAGE}
+	fi
+
+	if [ -z $BASE_IMAGE ]; then
+	    error "ERROR: Framework $FRAMEWORK without BASE_IMAGE"
+	fi
+
+	BASE_VERSION=${FRAMEWORK}_BASE_VERSION
+	BASE_VERSION=${!BASE_VERSION}
+
+    fi
+
+    if [ -z "$TAG" ]; then
+        TAG="triton-distributed:${FRAMEWORK,,}-${BASE_VERSION}"
+    fi
+
+    if [ ! -z "$PLATFORM" ]; then
+        PLATFORM="--platform ${PLATFORM}"
+    fi
+
+
+}
+
+
+show_image_options() {
+    echo ""
+    echo "Building Triton Distributed Image: '${TAG}'"
+    echo ""
+    echo "   Base: '${BASE_IMAGE}'"
+    echo "   Base_Image_Tag: '${BASE_IMAGE_TAG}'"
+    if [[ $FRAMEWORK == "TENSORRTLLM" ]]; then
+	echo "   Tensorrtllm Backend Commit: '${TENSORRTLLM_BACKEND_COMMIT}'"
+    fi
+    echo "   Build Context: '${BUILD_CONTEXT}'"
+    echo "   Build Arguments: '${BUILD_ARGS}'"
+    echo "   Framework: '${FRAMEWORK}'"
+    echo ""
+}
+
+show_help() {
+    echo "usage: build.sh"
+    echo "  [--base base image]"
+    echo "  [--base-imge-tag base image tag]"
+    echo "  [--platform platform for docker build"
+    echo "  [--framework framework one of ${!FRAMEWORKS[@]}]"
+    echo "  [--tensorrtllm-backend-commit commit or tag]"
+    echo "  [--build-arg additional build args to pass to docker build]"
+    echo "  [--tag tag for image]"
+    echo "  [--no-cache disable docker build cache]"
+    echo "  [--dry-run print docker commands without running]"
+    exit 0
+}
+
+missing_requirement() {
+    error "ERROR: $1 requires an argument."
+}
+
+error() {
+    printf '%s %s\n' "$1" "$2" >&2
+    exit 1
+}
+
+get_options "$@"
+
+
+# BUILD DEV IMAGE
+
+BUILD_ARGS+=" --build-arg BASE_IMAGE=$BASE_IMAGE --build-arg BASE_IMAGE_TAG=$BASE_IMAGE_TAG --build-arg FRAMEWORK=$FRAMEWORK --build-arg ${FRAMEWORK}_FRAMEWORK=1"
+
+if [ ! -z ${GITHUB_TOKEN} ]; then
+    BUILD_ARGS+=" --build-arg GITHUB_TOKEN=${GITHUB_TOKEN} "
+fi
+
+if [ ! -z ${GITLAB_TOKEN} ]; then
+    BUILD_ARGS+=" --build-arg GITLAB_TOKEN=${GITLAB_TOKEN} "
+fi
+
+if [[ $FRAMEWORK == "TENSORRTLLM" ]] && [ ! -z ${TENSORRTLLM_BACKEND_COMMIT} ]; then
+    BUILD_ARGS+=" --build-arg TENSORRTLLM_BACKEND_COMMIT=${TENSORRTLLM_BACKEND_COMMIT} "
+fi
+
+if [ ! -z ${HF_TOKEN} ]; then
+    BUILD_ARGS+=" --build-arg HF_TOKEN=${HF_TOKEN} "
+fi
+
+show_image_options
+
+
+if [ -z "$RUN_PREFIX" ]; then
+    set -x
+fi
+
+$RUN_PREFIX docker build -f $DOCKERFILE $PLATFORM $BUILD_ARGS -t $TAG $BUILD_CONTEXT $NO_CACHE 
+
+{ set +x; } 2>/dev/null
+
+if [ -z "$RUN_PREFIX" ]; then
+    set -x
+fi
+
+{ set +x; } 2>/dev/null
--- a/container/deps/clone_tensorrtllm.sh
+++ b/container/deps/clone_tensorrtllm.sh
+#!/bin/bash -e
+# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES.
+# SPDX-License-Identifier: Apache-2.0
+
+TENSORRTLLM_BACKEND_COMMIT=
+GIT_TOKEN=
+GIT_REPO=
+
+get_options() {
+    while :; do
+        case $1 in
+        -h | -\? | --help)
+            show_help
+            exit
+            ;;
+    --tensorrtllm-backend-commit)
+            if [ "$2" ]; then
+                TENSORRTLLM_BACKEND_COMMIT=$2
+                shift
+            else
+		missing_requirement $1
+            fi
+            ;;
+    --git-token)
+            if [ "$2" ]; then
+                GIT_TOKEN=$2
+                shift
+            else
+		missing_requirement $1
+            fi
+            ;;
+    --git-repo)
+            if [ "$2" ]; then
+                GIT_REPO=$2
+                shift
+            else
+		missing_requirement $1
+            fi
+            ;;
+         -?*)
+	    error 'ERROR: Unknown option: ' $1
+            ;;
+	 ?*)
+	    error 'ERROR: Unknown option: ' $1
+            ;;
+        *)
+            break
+            ;;
+        esac
+
+        shift
+    done
+}
+
+show_options() {
+    echo ""
+    echo "Getting TENSORRTLLM Backend Repo"
+    echo ""
+    echo "   TENSORRTLLM Backend Commit: '${TENSORRTLLM_BACKEND_COMMIT}'"
+    echo ""
+}
+
+
+show_help() {
+    echo "usage: clone_tensorrtllm.sh"
+    echo "  [--tensorrtllm-backend-commit commit]"
+    echo "  [--git-token git-token]"
+    echo "  [--git-repo git-repo]"	
+    exit 0
+}
+
+missing_requirement() {
+    error "ERROR: $1 requires an argument."
+}
+
+error() {
+    printf '%s %s\n' "$1" "$2" >&2
+    exit 1
+}
+
+get_options "$@"
+
+if [ -z ${GIT_REPO} ]; then
+       GIT_REPO="github.com/triton-inference-server/tensorrtllm_backend"
+fi
+
+if [ ! -z ${GIT_TOKEN} ]; then
+    GIT_REPO="https://oauth2:${GIT_TOKEN}@${GIT_REPO}"
+else
+    GIT_REPO="https://${GIT_REPO}"
+fi
+
+show_options
+
+git clone ${GIT_REPO}
+cd tensorrtllm_backend
+git reset --hard ${TENSORRTLLM_BACKEND_COMMIT}
+git submodule update --init --recursive
+git lfs install
+git lfs pull
+cd ..
+mv tensorrtllm_backend /
--- a/container/deps/requirements.nats.txt
+++ b/container/deps/requirements.nats.txt
+# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES.
+# SPDX-License-Identifier: Apache-2.0
+
+nats-py
--- a/container/deps/requirements.standard.txt
+++ b/container/deps/requirements.standard.txt
+# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES.
+# SPDX-License-Identifier: Apache-2.0
+
+ucx-py-cu12
--- a/container/deps/requirements.tensorrtllm.txt
+++ b/container/deps/requirements.tensorrtllm.txt
+# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES.
+# SPDX-License-Identifier: Apache-2.0
+
+fastapi==0.111.0
+openai==1.39.0
+pydantic==2.7.1
+transformers==4.45.2
+triton==3.1.0
+
--- a/container/deps/requirements.test.txt
+++ b/container/deps/requirements.test.txt
+# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES.
+# SPDX-License-Identifier: Apache-2.0
+
+
+pytest
+pytest-asyncio
+pytest-benchmark
+pytest-cov
+pytest-timeout
--- a/container/deps/requirements.txt
+++ b/container/deps/requirements.txt
+# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES.
+# SPDX-License-Identifier: Apache-2.0
+
+fastapi==0.111.0
+ftfy
+grpcio-tools==1.66.0
+httpx
+mypy
+numpy
+opentelemetry-api
+opentelemetry-sdk
+pre-commit
+protobuf==5.27.3
+pyright
+sentencepiece
+starlette==0.27.0
+transformers
+tritonclient==2.53.0
+# TODO: See whether TRT-LLM installs a different version of UCX. Need to revisit and track this dependency.
+ucx-py-cu12
+uvicorn==0.30.6
--- a/container/deps/requirements.vllm.txt
+++ b/container/deps/requirements.vllm.txt
+# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES.
+# SPDX-License-Identifier: Apache-2.0
+
+# Necessary for vLLM engine.
+--extra-index-url https://flashinfer.ai/whl/cu121/torch2.4
+flashinfer
+ucx-py-cu12
+# TODO update to branch / fork
+vllm==0.6.6.post1