"vscode:/vscode.git/clone" did not exist on "6d4475db4d97147db11d8b1bd9b8bb156870d874"
Commit 20b36843 authored by Tanmay Verma's avatar Tanmay Verma Committed by GitHub
Browse files

chore: Update base from from 24.12 to 25.01 Triton version (#100)

parent 21a8a79c
......@@ -14,7 +14,7 @@
# limitations under the License.
ARG BASE_IMAGE="nvcr.io/nvidia/tritonserver"
ARG BASE_IMAGE_TAG="24.12-py3"
ARG BASE_IMAGE_TAG="25.01-py3"
ARG VLLM_WHEEL
FROM ${BASE_IMAGE}:${BASE_IMAGE_TAG} AS triton-distributed
......@@ -34,12 +34,11 @@ ENV PATH="/root/.cargo/bin:${PATH}"
# Install OpenAI-compatible frontend and its dependencies from triton server
# repository. These are used to have a consistent interface, schema, and FastAPI
# app between Triton Core and Triton Distributed implementations.
# NOTE: Current commit is == r24.12 + enum serialization fix
ARG SERVER_OPENAI_COMMIT="2ebd762fa6c7b829e7d04bfaf80c8400a09d3767"
ARG OPENAI_SERVER_TAG="r25.01"
RUN mkdir -p /opt/tritonserver/python && \
cd /opt/tritonserver/python && \
rm -rf openai && \
git clone https://github.com/triton-inference-server/server.git && \
git clone -b ${OPENAI_SERVER_TAG} --single-branch https://github.com/triton-inference-server/server.git && \
cd server && \
git checkout ${SERVER_OPENAI_COMMIT} && \
cd .. && \
......@@ -66,7 +65,7 @@ RUN find /opt/tritonserver/python -maxdepth 1 -type f -name \
"tritonserver-*.whl" | xargs -I {} pip3 install --force-reinstall --upgrade {}[all]
# GENAI Perf Install
ARG GENAI_PERF_TAG="r24.12"
ARG GENAI_PERF_TAG="r25.01"
RUN pip install "git+https://github.com/triton-inference-server/perf_analyzer.git@${GENAI_PERF_TAG}#subdirectory=genai-perf"
# Backend & Framework Specific Installation
......
......@@ -52,16 +52,16 @@ BUILD_CONTEXT=$(dirname "$(readlink -f "$SOURCE_DIR")")
# Base Images
STANDARD_BASE_VERSION=24.12
STANDARD_BASE_VERSION=25.01
STANDARD_BASE_IMAGE=nvcr.io/nvidia/tritonserver
STANDARD_BASE_IMAGE_TAG=${STANDARD_BASE_VERSION}-py3
TENSORRTLLM_BASE_VERSION=24.12
TENSORRTLLM_BASE_VERSION=25.01
TENSORRTLLM_BASE_IMAGE=nvcr.io/nvidia/tritonserver
TENSORRTLLM_BASE_IMAGE_TAG=${TENSORRTLLM_BASE_VERSION}-trtllm-python-py3
# IMPORTANT NOTE: Ensure the repo tag complies with the TRTLLM backend version
# used in the base image above.
TENSORRTLLM_BACKEND_REPO_TAG=v0.16.0
TENSORRTLLM_BACKEND_REPO_TAG=v0.17.0
# Set this as 1 to rebuild and replace trtllm backend bits in the container.
# This will allow building triton distributed container image with custom
# trt-llm backend repo branch.
......@@ -69,7 +69,7 @@ TENSORRTLLM_BACKEND_REBUILD=0
# vllm installation is done later in the Dockerfile so it will overwrite the
# vllm version installed in the base image.
VLLM_BASE_VERSION=24.12
VLLM_BASE_VERSION=25.01
VLLM_BASE_IMAGE=nvcr.io/nvidia/tritonserver
VLLM_BASE_IMAGE_TAG=${VLLM_BASE_VERSION}-vllm-python-py3
......
......@@ -32,3 +32,4 @@ transformers
tritonclient==2.53.0
# TODO: See whether TRT-LLM installs a different version of UCX. Need to revisit and track this dependency.
ucx-py-cu12
uvicorn
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment