Commit cd14a1c5 authored by Ryan McCormick's avatar Ryan McCormick Committed by GitHub
Browse files

refactor: Remove STANDARD and VLLM_NIXL choices from build/run (#148)

parent f04359cf
...@@ -48,14 +48,12 @@ Dynamo development and examples are container based. ...@@ -48,14 +48,12 @@ Dynamo development and examples are container based.
You can build the Dynamo container using the build scripts You can build the Dynamo container using the build scripts
in `container/` (or directly with `docker build`). in `container/` (or directly with `docker build`).
We provide 3 types of builds: We provide 2 types of builds:
1. `STANDARD` which includes our default set of backends (onnx, openvino...) 1. `VLLM` which includes our VLLM backend using new NIXL communication library.
2. `TENSORRTLLM` which includes our TRT-LLM backend 2. `TENSORRTLLM` which includes our TRT-LLM backend
3. `VLLM` which includes our VLLM backend using NCCL communication library.
4. `VLLM_NIXL` which includes our VLLM backend using new NIXL communication library.
For example, if you want to build a container for the `STANDARD` backends you can run For example, if you want to build a container for the `VLLM` backend you can run
<!--pytest.mark.skip--> <!--pytest.mark.skip-->
```bash ```bash
......
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
ARG BASE_IMAGE="nvcr.io/nvidia/tritonserver"
ARG BASE_IMAGE_TAG="25.01-py3"
FROM ${BASE_IMAGE}:${BASE_IMAGE_TAG} AS dynamo
# TODO: non root user by default
USER root
# TODO: separate dev from runtime dependendcies
# Rust build/dev dependencies
RUN apt-get update && \
apt-get install --no-install-recommends --yes gdb protobuf-compiler cmake libssl-dev pkg-config
RUN curl https://sh.rustup.rs -sSf | bash -s -- -y
ENV PATH="/root/.cargo/bin:${PATH}"
RUN rustup toolchain install 1.85.0-x86_64-unknown-linux-gnu
# Install OpenAI-compatible frontend and its dependencies from triton server
# repository. These are used to have a consistent interface, schema, and FastAPI
# app between Triton Core and Dynamo implementations.
ARG OPENAI_SERVER_TAG="r25.01"
RUN mkdir -p /opt/tritonserver/python && \
cd /opt/tritonserver/python && \
rm -rf openai && \
git clone -b ${OPENAI_SERVER_TAG} --single-branch https://github.com/triton-inference-server/server.git && \
cd server && \
git checkout ${SERVER_OPENAI_COMMIT} && \
cd .. && \
mv server/python/openai openai && \
chown -R root:root openai && \
chmod 755 openai && \
chmod -R go-w openai && \
rm -rf server && \
python3 -m pip install -r openai/requirements.txt
# Common dependencies
RUN --mount=type=bind,source=./container/deps/requirements.txt,target=/tmp/requirements.txt \
pip install --timeout=2000 --requirement /tmp/requirements.txt
RUN --mount=type=bind,source=./container/deps/requirements.nats.txt,target=/tmp/requirements.txt \
pip install --timeout=2000 --requirement /tmp/requirements.txt
RUN --mount=type=bind,source=./container/deps/requirements.test.txt,target=/tmp/requirements.txt \
pip install --timeout=2000 --requirement /tmp/requirements.txt
# Finish pyright install
RUN pyright --help > /dev/null 2>&1
# In Process Python API Install
RUN find /opt/tritonserver/python -maxdepth 1 -type f -name \
"tritonserver-*.whl" | xargs -I {} pip3 install --force-reinstall --upgrade {}[all]
# GENAI Perf Install
# TODO: Move to tag when fix for genai-perf will be released
ARG GENAI_PERF_TAG="25d0188713adc47868d6b3f22426375237a90529"
RUN pip install "git+https://github.com/triton-inference-server/perf_analyzer.git@${GENAI_PERF_TAG}#subdirectory=genai-perf"
# Backend & Framework Specific Installation
ARG FRAMEWORK="STANDARD"
RUN --mount=type=bind,source=./container/deps/requirements.standard.txt,target=/tmp/requirements.txt \
if [[ "$FRAMEWORK" == "STANDARD" ]] ; then pip install --timeout=2000 -r /tmp/requirements.txt ; fi
# TODO set VLLM Version
# ENV VLLM_VERSION
ARG VLLM_FRAMEWORK
# DEFAULT VLLM VARIABLES
# ENV VLLM_ATTENTION_BACKEND=${VLLM_FRAMEWORK:+FLASHINFER}
ENV VLLM_WORKER_MULTIPROC_METHOD=${VLLM_FRAMEWORK:+spawn}
ENV VLLM_TORCH_HOST=${VLLM_FRAMEWORK:+localhost}
ENV VLLM_TORCH_PORT=${VLLM_FRAMEWORK:+36183}
ENV VLLM_DATA_PLANE_BACKEND=${VLLM_FRAMEWORK:+nccl}
ENV VLLM_BASELINE_WORKERS=${VLLM_FRAMEWORK:+0}
ENV VLLM_CONTEXT_WORKERS=${VLLM_FRAMEWORK:+1}
ENV VLLM_GENERATE_WORKERS=${VLLM_FRAMEWORK:+1}
ENV VLLM_BASELINE_TP_SIZE=${VLLM_FRAMEWORK:+1}
ENV VLLM_CONTEXT_TP_SIZE=${VLLM_FRAMEWORK:+1}
ENV VLLM_GENERATE_TP_SIZE=${VLLM_FRAMEWORK:+1}
ENV VLLM_KV_CAPI_PATH="/opt/dynamo/bindings/lib/libdynamo_llm_capi.so"
ENV PYTHONUNBUFFERED=1
# Install NATS - pointing toward NATS github instead of binaries.nats.dev due to server instability
RUN wget https://github.com/nats-io/nats-server/releases/download/v2.10.24/nats-server-v2.10.24-amd64.deb && dpkg -i nats-server-v2.10.24-amd64.deb && rm nats-server-v2.10.24-amd64.deb
# etcd
ENV ETCD_VERSION="v3.5.18"
RUN wget https://github.com/etcd-io/etcd/releases/download/$ETCD_VERSION/etcd-$ETCD_VERSION-linux-amd64.tar.gz -O /tmp/etcd.tar.gz && \
mkdir -p /usr/local/bin/etcd && \
tar -xvf /tmp/etcd.tar.gz -C /usr/local/bin/etcd --strip-components=1 && \
rm /tmp/etcd.tar.gz
ENV PATH=/usr/local/bin/etcd/:$PATH
# Enable Git operations in the /workspace directory.
RUN printf "[safe]\n directory=/workspace\n" > /root/.gitconfig
# emacs docker-tramp requires /bin/sh to be linked to bash to operate correctly
RUN ln -sf /bin/bash /bin/sh
# Install NGINX
RUN apt-get install nginx -y
RUN rm -rf /etc/nginx/sites-enabled/default
# Install demo utils
RUN apt-get install nvtop -y
RUN apt-get install tmux -y
# Working directory
WORKDIR /workspace
# Copy Python wheel configuration files
COPY pyproject.toml /workspace/
COPY README.md /workspace/
COPY LICENSE /workspace/
COPY Cargo.toml /workspace/
COPY Cargo.lock /workspace/
COPY rust-toolchain.toml /workspace/
ARG CARGO_BUILD_JOBS
ENV CARGO_TARGET_DIR=/workspace/target
# Build Rust
COPY lib/ /workspace/lib/
COPY components /workspace/components
COPY launch /workspace/launch
RUN cargo build --release --locked --features mistralrs,sglang,vllm,python && cargo doc --no-deps && \
cp target/release/dynamo-run /usr/local/bin/ && \
cp target/release/llmctl /usr/local/bin/
COPY deploy/dynamo/sdk /workspace/deploy/dynamo/sdk
# Install uv, create virtualenv for general use, and build dynamo wheel
COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /bin/
RUN mkdir /opt/dynamo && \
uv venv /opt/dynamo/venv --python 3.12 && \
source /opt/dynamo/venv/bin/activate && \
uv build --wheel --out-dir /workspace/dist && \
uv pip install /workspace/dist/ai_dynamo*cp312*.whl && \
cd /workspace/deploy/dynamo/sdk && \
uv build --wheel --out-dir /workspace/dist && \
uv pip install /workspace/dist/ai_dynamo_sdk*any.whl
# Package the bindings
RUN mkdir -p /opt/dynamo/bindings/wheels && \
mkdir /opt/dynamo/bindings/lib && \
cp dist/ai_dynamo*cp312*.whl /opt/dynamo/bindings/wheels/. && \
cp target/release/libdynamo_llm_capi.so /opt/dynamo/bindings/lib/. && \
cp -r lib/bindings/c/include /opt/dynamo/bindings/.
# Install dynamo.runtime and dynamo.llm wheels globally in container for tests that
# currently run without virtual environment activated.
# TODO: In future, we may use a virtualenv for everything and remove this.
RUN cd /opt/dynamo/bindings/wheels && \
pip install ai_dynamo*cp312*.whl && \
pip install /workspace/dist/ai_dynamo_sdk*any.whl
# Copy everything in after ginstall steps to avoid re-running build/install
# commands on unrelated changes in other dirs.
COPY . /workspace
# Enable system UCX
ENV RAPIDS_LIBUCX_PREFER_SYSTEM_LIBRARY=true
# Command and Entrypoint
CMD []
ENTRYPOINT ["/opt/nvidia/nvidia_entrypoint.sh"]
...@@ -43,7 +43,7 @@ PYTHON_PACKAGE_VERSION=${current_tag:-$latest_tag.dev+$commit_id} ...@@ -43,7 +43,7 @@ PYTHON_PACKAGE_VERSION=${current_tag:-$latest_tag.dev+$commit_id}
# dependencies are specified in the /container/deps folder and # dependencies are specified in the /container/deps folder and
# installed within framework specific sections of the Dockerfile. # installed within framework specific sections of the Dockerfile.
declare -A FRAMEWORKS=(["STANDARD"]=1 ["TENSORRTLLM"]=2 ["VLLM"]=3 ["VLLM_NIXL"]=4) declare -A FRAMEWORKS=(["VLLM"]=1 ["TENSORRTLLM"]=2)
DEFAULT_FRAMEWORK=VLLM DEFAULT_FRAMEWORK=VLLM
SOURCE_DIR=$(dirname "$(readlink -f "$0")") SOURCE_DIR=$(dirname "$(readlink -f "$0")")
...@@ -51,12 +51,8 @@ DOCKERFILE=${SOURCE_DIR}/Dockerfile ...@@ -51,12 +51,8 @@ DOCKERFILE=${SOURCE_DIR}/Dockerfile
BUILD_CONTEXT=$(dirname "$(readlink -f "$SOURCE_DIR")") BUILD_CONTEXT=$(dirname "$(readlink -f "$SOURCE_DIR")")
# Base Images # Base Images
STANDARD_BASE_VERSION=25.01
STANDARD_BASE_IMAGE=nvcr.io/nvidia/tritonserver
STANDARD_BASE_IMAGE_TAG=${STANDARD_BASE_VERSION}-py3
TENSORRTLLM_BASE_VERSION=25.01 TENSORRTLLM_BASE_VERSION=25.01
# FIXME: Need a public image for public consumption
TENSORRTLLM_BASE_IMAGE="gitlab-master.nvidia.com:5005/dl/dgx/tritonserver/tensorrt-llm/amd64" TENSORRTLLM_BASE_IMAGE="gitlab-master.nvidia.com:5005/dl/dgx/tritonserver/tensorrt-llm/amd64"
TENSORRTLLM_BASE_IMAGE_TAG=krish-fix-trtllm-build.23766174 TENSORRTLLM_BASE_IMAGE_TAG=krish-fix-trtllm-build.23766174
TENSORRTLLM_PIP_WHEEL_PATH="" TENSORRTLLM_PIP_WHEEL_PATH=""
...@@ -194,10 +190,6 @@ get_options() { ...@@ -194,10 +190,6 @@ get_options() {
FRAMEWORK=$DEFAULT_FRAMEWORK FRAMEWORK=$DEFAULT_FRAMEWORK
fi fi
if [[ ${FRAMEWORK^^} == "VLLM_NIXL" ]]; then
FRAMEWORK="VLLM"
fi
if [ ! -z "$FRAMEWORK" ]; then if [ ! -z "$FRAMEWORK" ]; then
FRAMEWORK=${FRAMEWORK^^} FRAMEWORK=${FRAMEWORK^^}
......
...@@ -22,7 +22,7 @@ RUN_PREFIX= ...@@ -22,7 +22,7 @@ RUN_PREFIX=
# dependencies are specified in the /container/deps folder and # dependencies are specified in the /container/deps folder and
# installed within framework specific sections of the Dockerfile. # installed within framework specific sections of the Dockerfile.
declare -A FRAMEWORKS=(["STANDARD"]=1 ["TENSORRTLLM"]=2 ["VLLM"]=3 ["VLLM_NIXL"]=4) declare -A FRAMEWORKS=(["VLLM"]=1 ["TENSORRTLLM"]=2)
DEFAULT_FRAMEWORK=VLLM DEFAULT_FRAMEWORK=VLLM
SOURCE_DIR=$(dirname "$(readlink -f "$0")") SOURCE_DIR=$(dirname "$(readlink -f "$0")")
...@@ -170,10 +170,6 @@ get_options() { ...@@ -170,10 +170,6 @@ get_options() {
FRAMEWORK=$DEFAULT_FRAMEWORK FRAMEWORK=$DEFAULT_FRAMEWORK
fi fi
if [[ ${FRAMEWORK^^} == "VLLM_NIXL" ]]; then
FRAMEWORK="VLLM"
fi
if [ ! -z "$FRAMEWORK" ]; then if [ ! -z "$FRAMEWORK" ]; then
FRAMEWORK=${FRAMEWORK^^} FRAMEWORK=${FRAMEWORK^^}
if [[ ! -n "${FRAMEWORKS[$FRAMEWORK]}" ]]; then if [[ ! -n "${FRAMEWORKS[$FRAMEWORK]}" ]]; then
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment