Commit 20b36843 authored by Tanmay Verma's avatar Tanmay Verma Committed by GitHub
Browse files

chore: Update base from from 24.12 to 25.01 Triton version (#100)

parent 21a8a79c
...@@ -14,7 +14,7 @@ ...@@ -14,7 +14,7 @@
# limitations under the License. # limitations under the License.
ARG BASE_IMAGE="nvcr.io/nvidia/tritonserver" ARG BASE_IMAGE="nvcr.io/nvidia/tritonserver"
ARG BASE_IMAGE_TAG="24.12-py3" ARG BASE_IMAGE_TAG="25.01-py3"
ARG VLLM_WHEEL ARG VLLM_WHEEL
FROM ${BASE_IMAGE}:${BASE_IMAGE_TAG} AS triton-distributed FROM ${BASE_IMAGE}:${BASE_IMAGE_TAG} AS triton-distributed
...@@ -34,12 +34,11 @@ ENV PATH="/root/.cargo/bin:${PATH}" ...@@ -34,12 +34,11 @@ ENV PATH="/root/.cargo/bin:${PATH}"
# Install OpenAI-compatible frontend and its dependencies from triton server # Install OpenAI-compatible frontend and its dependencies from triton server
# repository. These are used to have a consistent interface, schema, and FastAPI # repository. These are used to have a consistent interface, schema, and FastAPI
# app between Triton Core and Triton Distributed implementations. # app between Triton Core and Triton Distributed implementations.
# NOTE: Current commit is == r24.12 + enum serialization fix ARG OPENAI_SERVER_TAG="r25.01"
ARG SERVER_OPENAI_COMMIT="2ebd762fa6c7b829e7d04bfaf80c8400a09d3767"
RUN mkdir -p /opt/tritonserver/python && \ RUN mkdir -p /opt/tritonserver/python && \
cd /opt/tritonserver/python && \ cd /opt/tritonserver/python && \
rm -rf openai && \ rm -rf openai && \
git clone https://github.com/triton-inference-server/server.git && \ git clone -b ${OPENAI_SERVER_TAG} --single-branch https://github.com/triton-inference-server/server.git && \
cd server && \ cd server && \
git checkout ${SERVER_OPENAI_COMMIT} && \ git checkout ${SERVER_OPENAI_COMMIT} && \
cd .. && \ cd .. && \
...@@ -66,7 +65,7 @@ RUN find /opt/tritonserver/python -maxdepth 1 -type f -name \ ...@@ -66,7 +65,7 @@ RUN find /opt/tritonserver/python -maxdepth 1 -type f -name \
"tritonserver-*.whl" | xargs -I {} pip3 install --force-reinstall --upgrade {}[all] "tritonserver-*.whl" | xargs -I {} pip3 install --force-reinstall --upgrade {}[all]
# GENAI Perf Install # GENAI Perf Install
ARG GENAI_PERF_TAG="r24.12" ARG GENAI_PERF_TAG="r25.01"
RUN pip install "git+https://github.com/triton-inference-server/perf_analyzer.git@${GENAI_PERF_TAG}#subdirectory=genai-perf" RUN pip install "git+https://github.com/triton-inference-server/perf_analyzer.git@${GENAI_PERF_TAG}#subdirectory=genai-perf"
# Backend & Framework Specific Installation # Backend & Framework Specific Installation
......
...@@ -52,16 +52,16 @@ BUILD_CONTEXT=$(dirname "$(readlink -f "$SOURCE_DIR")") ...@@ -52,16 +52,16 @@ BUILD_CONTEXT=$(dirname "$(readlink -f "$SOURCE_DIR")")
# Base Images # Base Images
STANDARD_BASE_VERSION=24.12 STANDARD_BASE_VERSION=25.01
STANDARD_BASE_IMAGE=nvcr.io/nvidia/tritonserver STANDARD_BASE_IMAGE=nvcr.io/nvidia/tritonserver
STANDARD_BASE_IMAGE_TAG=${STANDARD_BASE_VERSION}-py3 STANDARD_BASE_IMAGE_TAG=${STANDARD_BASE_VERSION}-py3
TENSORRTLLM_BASE_VERSION=24.12 TENSORRTLLM_BASE_VERSION=25.01
TENSORRTLLM_BASE_IMAGE=nvcr.io/nvidia/tritonserver TENSORRTLLM_BASE_IMAGE=nvcr.io/nvidia/tritonserver
TENSORRTLLM_BASE_IMAGE_TAG=${TENSORRTLLM_BASE_VERSION}-trtllm-python-py3 TENSORRTLLM_BASE_IMAGE_TAG=${TENSORRTLLM_BASE_VERSION}-trtllm-python-py3
# IMPORTANT NOTE: Ensure the repo tag complies with the TRTLLM backend version # IMPORTANT NOTE: Ensure the repo tag complies with the TRTLLM backend version
# used in the base image above. # used in the base image above.
TENSORRTLLM_BACKEND_REPO_TAG=v0.16.0 TENSORRTLLM_BACKEND_REPO_TAG=v0.17.0
# Set this as 1 to rebuild and replace trtllm backend bits in the container. # Set this as 1 to rebuild and replace trtllm backend bits in the container.
# This will allow building triton distributed container image with custom # This will allow building triton distributed container image with custom
# trt-llm backend repo branch. # trt-llm backend repo branch.
...@@ -69,7 +69,7 @@ TENSORRTLLM_BACKEND_REBUILD=0 ...@@ -69,7 +69,7 @@ TENSORRTLLM_BACKEND_REBUILD=0
# vllm installation is done later in the Dockerfile so it will overwrite the # vllm installation is done later in the Dockerfile so it will overwrite the
# vllm version installed in the base image. # vllm version installed in the base image.
VLLM_BASE_VERSION=24.12 VLLM_BASE_VERSION=25.01
VLLM_BASE_IMAGE=nvcr.io/nvidia/tritonserver VLLM_BASE_IMAGE=nvcr.io/nvidia/tritonserver
VLLM_BASE_IMAGE_TAG=${VLLM_BASE_VERSION}-vllm-python-py3 VLLM_BASE_IMAGE_TAG=${VLLM_BASE_VERSION}-vllm-python-py3
......
...@@ -32,3 +32,4 @@ transformers ...@@ -32,3 +32,4 @@ transformers
tritonclient==2.53.0 tritonclient==2.53.0
# TODO: See whether TRT-LLM installs a different version of UCX. Need to revisit and track this dependency. # TODO: See whether TRT-LLM installs a different version of UCX. Need to revisit and track this dependency.
ucx-py-cu12 ucx-py-cu12
uvicorn
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment