chore: Update base from from 24.12 to 25.01 Triton version (#100)

20b36843 · Tanmay Verma · GitHub · 21a8a79c · 20b36843 · 20b36843
Commit 20b36843 authored Feb 10, 2025 by Tanmay Verma Committed by GitHub Feb 10, 2025
Show whitespace changes
Inline Side-by-side

Showing with 9 additions and 9 deletions

container/Dockerfile container/Dockerfile +4 -5

container/build.sh container/build.sh +4 -4

container/deps/requirements.txt container/deps/requirements.txt +1 -0

No files found.
--- a/container/Dockerfile
+++ b/container/Dockerfile
@@ -14,7 +14,7 @@
 # limitations under the License.
 ARG BASE_IMAGE="nvcr.io/nvidia/tritonserver"
-ARG BASE_IMAGE_TAG="24.12-py3"
+ARG BASE_IMAGE_TAG="25.01-py3"
 ARG VLLM_WHEEL
 FROM ${BASE_IMAGE}:${BASE_IMAGE_TAG} AS triton-distributed
@@ -34,12 +34,11 @@ ENV PATH="/root/.cargo/bin:${PATH}"
 # Install OpenAI-compatible frontend and its dependencies from triton server
 # repository. These are used to have a consistent interface, schema, and FastAPI
 # app between Triton Core and Triton Distributed implementations.
-# NOTE: Current commit is == r24.12 + enum serialization fix
+ARG OPENAI_SERVER_TAG="r25.01"
-ARG SERVER_OPENAI_COMMIT="2ebd762fa6c7b829e7d04bfaf80c8400a09d3767"
 RUN mkdir -p /opt/tritonserver/python && \
    cd /opt/tritonserver/python && \
    rm -rf openai && \
-    git clone https://github.com/triton-inference-server/server.git && \
+    git clone -b ${OPENAI_SERVER_TAG} --single-branch https://github.com/triton-inference-server/server.git && \
    cd server && \
    git checkout ${SERVER_OPENAI_COMMIT} && \
    cd .. && \
@@ -66,7 +65,7 @@ RUN find /opt/tritonserver/python -maxdepth 1 -type f -name \
    "tritonserver-*.whl" | xargs -I {} pip3 install --force-reinstall --upgrade {}[all]
 # GENAI Perf Install
-ARG GENAI_PERF_TAG="r24.12"
+ARG GENAI_PERF_TAG="r25.01"
 RUN pip install "git+https://github.com/triton-inference-server/perf_analyzer.git@${GENAI_PERF_TAG}#subdirectory=genai-perf"
 # Backend & Framework Specific Installation

--- a/container/build.sh
+++ b/container/build.sh
@@ -52,16 +52,16 @@ BUILD_CONTEXT=$(dirname "$(readlink -f "$SOURCE_DIR")")
 # Base Images
-STANDARD_BASE_VERSION=24.12
+STANDARD_BASE_VERSION=25.01
 STANDARD_BASE_IMAGE=nvcr.io/nvidia/tritonserver
 STANDARD_BASE_IMAGE_TAG=${STANDARD_BASE_VERSION}-py3
-TENSORRTLLM_BASE_VERSION=24.12
+TENSORRTLLM_BASE_VERSION=25.01
 TENSORRTLLM_BASE_IMAGE=nvcr.io/nvidia/tritonserver
 TENSORRTLLM_BASE_IMAGE_TAG=${TENSORRTLLM_BASE_VERSION}-trtllm-python-py3
 # IMPORTANT NOTE: Ensure the repo tag complies with the TRTLLM backend version
 # used in the base image above.
-TENSORRTLLM_BACKEND_REPO_TAG=v0.16.0
+TENSORRTLLM_BACKEND_REPO_TAG=v0.17.0
 # Set this as 1 to rebuild and replace trtllm backend bits in the container.
 # This will allow building triton distributed container image with custom
 # trt-llm backend repo branch.
@@ -69,7 +69,7 @@ TENSORRTLLM_BACKEND_REBUILD=0
 # vllm installation is done later in the Dockerfile so it will overwrite the
 # vllm version installed in the base image.
-VLLM_BASE_VERSION=24.12
+VLLM_BASE_VERSION=25.01
 VLLM_BASE_IMAGE=nvcr.io/nvidia/tritonserver
 VLLM_BASE_IMAGE_TAG=${VLLM_BASE_VERSION}-vllm-python-py3

--- a/container/deps/requirements.txt
+++ b/container/deps/requirements.txt
@@ -32,3 +32,4 @@ transformers
 tritonclient==2.53.0
 # TODO: See whether TRT-LLM installs a different version of UCX. Need to revisit and track this dependency.
 ucx-py-cu12
+uvicorn