Unverified Commit 6d2be143 authored by Biswa Panda's avatar Biswa Panda Committed by GitHub
Browse files

refactor: replace vllm with vllm_v1 container (#1953)


Co-authored-by: default avataralec-flowers <aflowers@nvidia.com>
parent 4d2a31ab
......@@ -69,7 +69,8 @@ RUN apt-get update -y && \
tmux \
vim \
autoconf \
libtool
libtool \
net-tools
# These headers are missing with the hpcx installer, required
# by UCX to find RDMA devices
......@@ -120,12 +121,21 @@ WORKDIR /workspace
# Copy nixl source, and use commit hash as cache hint
COPY --from=nixl_base /opt/nixl /opt/nixl
COPY --from=nixl_base /opt/nixl/commit.txt /opt/nixl/commit.txt
RUN cd /opt/nixl && \
mkdir build && \
meson setup build/ --buildtype=release --prefix=/usr/local/nixl && \
cd build/ && \
ninja && \
ninja install
RUN if [ "$ARCH" = "arm64" ]; then \
cd /opt/nixl && \
mkdir build && \
meson setup build/ --buildtype=release --prefix=/usr/local/nixl -Dgds_path=/usr/local/cuda/targets/sbsa-linux && \
cd build/ && \
ninja && \
ninja install; \
else \
cd /opt/nixl && \
mkdir build && \
meson setup build/ --buildtype=release --prefix=/usr/local/nixl && \
cd build/ && \
ninja && \
ninja install; \
fi
### NATS & ETCD SETUP ###
# nats
......@@ -152,65 +162,37 @@ ENV VIRTUAL_ENV=/opt/dynamo/venv
ENV PATH="${VIRTUAL_ENV}/bin:${PATH}"
# Install NIXL Python module
RUN cd /opt/nixl && uv build . --out-dir /workspace/wheels/nixl
# TODO: Move gds_path selection based on arch into NIXL build
RUN if [ "$ARCH" = "arm64" ]; then \
cd /opt/nixl && uv build . --out-dir /workspace/wheels/nixl \
--config-settings=setup-args="-Dgds_path=/usr/local/cuda/targets/sbsa-linux"; \
else \
cd /opt/nixl && uv build . --out-dir /workspace/wheels/nixl; \
fi
# Install the wheel
# TODO: Move NIXL wheel install to the wheel_builder stage
RUN uv pip install /workspace/wheels/nixl/*.whl
# Install patched vllm - keep this early in Dockerfile to avoid
# Install vllm - keep this early in Dockerfile to avoid
# rebuilds from unrelated source code changes
ARG VLLM_REF="0.8.4"
ARG VLLM_PATCH="vllm_v${VLLM_REF}-dynamo-kv-disagg-patch.patch"
ARG VLLM_PATCHED_PACKAGE_NAME="ai_dynamo_vllm"
ARG VLLM_PATCHED_PACKAGE_VERSION="0.8.4.post4"
ARG VLLM_MAX_JOBS=4
ARG VLLM_REF="059d4cd"
ENV CUDA_HOME=/usr/local/cuda
RUN --mount=type=bind,source=./container/deps/,target=/tmp/deps \
--mount=type=cache,target=/root/.cache/uv \
mkdir /tmp/vllm && \
uv pip install pip wheel && \
# NOTE: vLLM build from source on ARM can take several hours, see VLLM_MAX_JOBS details.
if [ "$ARCH" = "arm64" ]; then \
# PyTorch 2.7 supports CUDA 12.8 and aarch64 installs
# NIXL has a torch dependency, so need to force-reinstall to install the correct version
uv pip install torch==2.7.0 torchvision torchaudio --force-reinstall --index-url https://download.pytorch.org/whl/cu128 && \
# Download vLLM source with version matching patch
git clone --branch v${VLLM_REF} --depth 1 https://github.com/vllm-project/vllm.git /tmp/vllm/vllm-${VLLM_REF} && \
cd /tmp/vllm/vllm-${VLLM_REF}/ && \
# Patch vLLM source with dynamo additions
patch -p1 < /tmp/deps/vllm/${VLLM_PATCH} && \
# WAR: Set package version check to 'vllm' instead of 'ai_dynamo_vllm' to avoid
# platform detection issues on ARM install.
# TODO: Rename package from vllm to ai_dynamo_vllm like x86 path below to remove this WAR.
sed -i 's/version("ai_dynamo_vllm")/version("vllm")/g' vllm/platforms/__init__.py && \
# Remove pytorch from vllm install dependencies
python use_existing_torch.py && \
# Build/install vllm from source
uv pip install -r requirements/build.txt && \
# MAX_JOBS set to avoid running OOM on vllm-flash-attn build, this can
# significantly impact the overall build time. Each job can take up
# to -16GB RAM each, so tune according to available system memory.
MAX_JOBS=${VLLM_MAX_JOBS} uv pip install -vv . --no-build-isolation ; \
# Handle x86_64: Download wheel, unpack, setup for later steps
else \
python -m pip download --only-binary=:all: --no-deps --dest /tmp/vllm vllm==v${VLLM_REF} && \
# Patch vLLM pre-built download with dynamo additions
cd /tmp/vllm && \
wheel unpack *.whl && \
cd vllm-${VLLM_REF}/ && \
patch -p1 < /tmp/deps/vllm/${VLLM_PATCH} && \
# Rename the package from vllm to ai_dynamo_vllm
mv vllm-${VLLM_REF}.dist-info ${VLLM_PATCHED_PACKAGE_NAME}-${VLLM_PATCHED_PACKAGE_VERSION}.dist-info && \
sed -i "s/^Name: vllm/Name: ${VLLM_PATCHED_PACKAGE_NAME}/g" ${VLLM_PATCHED_PACKAGE_NAME}-${VLLM_PATCHED_PACKAGE_VERSION}.dist-info/METADATA && \
sed -i "s/^Version: ${VLLM_REF}/Version: ${VLLM_PATCHED_PACKAGE_VERSION}/g" ${VLLM_PATCHED_PACKAGE_NAME}-${VLLM_PATCHED_PACKAGE_VERSION}.dist-info/METADATA && \
# Update wheel tag from linux_${ARCH_ALT} to manylinux1_${ARCH_ALT} in WHEEL file
sed -i "s/Tag: cp38-abi3-linux_${ARCH_ALT}/Tag: cp38-abi3-manylinux1_${ARCH_ALT}/g" ${VLLM_PATCHED_PACKAGE_NAME}-${VLLM_PATCHED_PACKAGE_VERSION}.dist-info/WHEEL && \
# Also update the tag in RECORD file to match
sed -i "s/-cp38-abi3-linux_${ARCH_ALT}.whl/-cp38-abi3-manylinux1_${ARCH_ALT}.whl/g" ${VLLM_PATCHED_PACKAGE_NAME}-${VLLM_PATCHED_PACKAGE_VERSION}.dist-info/RECORD && \
mkdir -p /workspace/dist && \
wheel pack . --dest-dir /workspace/dist && \
uv pip install /workspace/dist/${VLLM_PATCHED_PACKAGE_NAME}-*.whl ; \
fi
uv pip install pip cuda-python && \
mkdir /opt/vllm && \
cd /opt/vllm && \
git clone https://github.com/vllm-project/vllm.git && \
cd vllm && \
git checkout $VLLM_REF && \
VLLM_USE_PRECOMPILED=1 uv pip install -e . && \
cd tools/ep_kernels && \
bash install_python_libraries.sh && \
cd ep_kernels_workspace && \
git clone --recursive https://github.com/deepseek-ai/DeepGEMM.git && \
cd DeepGEMM && \
python setup.py install
# Common dependencies
RUN --mount=type=bind,source=./container/deps/requirements.txt,target=/tmp/requirements.txt \
......@@ -324,8 +306,6 @@ RUN SNIPPET="export PROMPT_COMMAND='history -a' && export HISTFILE=$HOME/.comman
RUN mkdir -p /home/$USERNAME/.cache/
ENV VLLM_KV_CAPI_PATH=$HOME/dynamo/.build/target/debug/libdynamo_llm_capi.so
ENTRYPOINT ["/opt/nvidia/nvidia_entrypoint.sh"]
##################################
......@@ -443,12 +423,7 @@ RUN --mount=type=bind,source=./container/launch_message.txt,target=/workspace/la
sed '/^#\s/d' /workspace/launch_message.txt > ~/.launch_screen && \
echo "cat ~/.launch_screen" >> ~/.bashrc
# Tell vllm to use the Dynamo LLM C API for KV Cache Routing
ENV VLLM_KV_CAPI_PATH=/opt/dynamo/bindings/lib/libdynamo_llm_capi.so
ARG ARCH_ALT
ENV NIXL_PLUGIN_DIR=/usr/local/nixl/lib/${ARCH_ALT}-linux-gnu/plugins
ENV LD_LIBRARY_PATH=/usr/local/nixl/lib/${ARCH_ALT}-linux-gnu:/usr/local/nixl/lib/${ARCH_ALT}-linux-gnu/plugins:/usr/local/ucx/lib:$LD_LIBRARY_PATH
ENV LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/opt/nvidia/nvda_nixl/lib/x86_64-linux-gnu/
########################################
########## Development Image ###########
......@@ -519,16 +494,13 @@ COPY --from=base /workspace/wheels/nixl/*.whl wheelhouse/
COPY --from=wheel_builder /workspace/dist/*.whl wheelhouse/
RUN uv pip install ai-dynamo[vllm] --find-links wheelhouse && \
uv pip install nixl --find-links wheelhouse && \
ln -sf $VIRTUAL_ENV/bin/* /usr/local/bin/
# Tell vllm to use the Dynamo LLM C API for KV Cache Routing
ENV VLLM_KV_CAPI_PATH="/opt/dynamo/bindings/lib/libdynamo_llm_capi.so"
ln -sf $VIRTUAL_ENV/bin/* /usr/local/bin/ && \
rm -r wheelhouse
# Copy launch banner
RUN --mount=type=bind,source=./container/launch_message.txt,target=/workspace/launch_message.txt \
sed '/^#\s/d' /workspace/launch_message.txt > ~/.launch_screen && \
echo "cat ~/.launch_screen" >> ~/.bashrc
ENTRYPOINT ["/opt/nvidia/nvidia_entrypoint.sh"]
CMD []
# SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
ARG BASE_IMAGE="nvcr.io/nvidia/cuda-dl-base"
# FIXME: NCCL will hang with 25.03, so use 25.01 for now
# Please check https://github.com/ai-dynamo/dynamo/pull/1065
# for details and reproducer to manually test if the image
# can be updated to later versions.
ARG BASE_IMAGE_TAG="25.01-cuda12.8-devel-ubuntu24.04"
ARG RELEASE_BUILD
ARG RUNTIME_IMAGE="nvcr.io/nvidia/cuda"
ARG RUNTIME_IMAGE_TAG="12.8.1-runtime-ubuntu24.04"
# Define general architecture ARGs for supporting both x86 and aarch64 builds.
# ARCH: Used for package suffixes (e.g., amd64, arm64)
# ARCH_ALT: Used for Rust targets, manylinux suffix (e.g., x86_64, aarch64)
#
# Default values are for x86/amd64:
# --build-arg ARCH=amd64 --build-arg ARCH_ALT=x86_64
#
# For arm64/aarch64, build with:
# --build-arg ARCH=arm64 --build-arg ARCH_ALT=aarch64
#
# NOTE: There isn't an easy way to define one of these values based on the other value
# without adding if statements everywhere, so just define both as ARGs for now.
ARG ARCH=amd64
ARG ARCH_ALT=x86_64
FROM ${BASE_IMAGE}:${BASE_IMAGE_TAG} AS nixl_base
# Redeclare ARCH and ARCH_ALT so they're available in this stage
ARG ARCH
ARG ARCH_ALT
WORKDIR /opt/nixl
# Add a cache hint that only changes when the nixl commit changes
ARG NIXL_COMMIT
# This line acts as a cache key - it only changes when NIXL_COMMIT changes
RUN echo "NIXL commit: ${NIXL_COMMIT}" > /opt/nixl/commit.txt
# Copy the nixl source
COPY --from=nixl . .
##################################
########## Base Image ############
##################################
FROM ${BASE_IMAGE}:${BASE_IMAGE_TAG} AS base
# Redeclare ARCH and ARCH_ALT so they're available in this stage
ARG ARCH
ARG ARCH_ALT
USER root
ARG PYTHON_VERSION=3.12
RUN apt-get update -y && \
apt-get install -y \
# NIXL build dependencies
cmake \
meson \
ninja-build \
pybind11-dev \
# Rust build dependencies
clang \
libclang-dev \
git \
# Install utilities
nvtop \
tmux \
vim \
autoconf \
libtool \
net-tools
# These headers are missing with the hpcx installer, required
# by UCX to find RDMA devices
RUN apt-get update -y && \
apt-get install -y --no-install-recommends \
--reinstall libibverbs-dev rdma-core ibverbs-utils libibumad-dev \
libnuma-dev librdmacm-dev ibverbs-providers
ARG NIXL_UCX_REF=v1.19.x
WORKDIR /workspace
### UCX EFA Setup ###
RUN rm -rf /opt/hpcx/ucx
RUN rm -rf /usr/local/ucx
RUN echo "Building UCX with reference $NIXL_UCX_REF"
RUN cd /usr/local/src && \
git clone https://github.com/openucx/ucx.git && \
cd ucx && \
git checkout $NIXL_UCX_REF && \
./autogen.sh && ./configure \
--prefix=/usr/local/ucx \
--enable-shared \
--disable-static \
--disable-doxygen-doc \
--enable-optimizations \
--enable-cma \
--enable-devel-headers \
--with-cuda=/usr/local/cuda \
--with-verbs \
--with-efa \
--with-dm \
--with-gdrcopy=/usr/local \
--enable-mt && \
make -j && \
make -j install-strip && \
ldconfig
ENV LD_LIBRARY_PATH=/usr/lib:/usr/local/ucx/lib:$LD_LIBRARY_PATH
ENV CPATH=/usr/include
ENV PATH=/usr/bin:$PATH
ENV PKG_CONFIG_PATH=/usr/lib/pkgconfig
SHELL ["/bin/bash", "-c"]
WORKDIR /workspace
### NIXL SETUP ###
# Copy nixl source, and use commit hash as cache hint
COPY --from=nixl_base /opt/nixl /opt/nixl
COPY --from=nixl_base /opt/nixl/commit.txt /opt/nixl/commit.txt
RUN if [ "$ARCH" = "arm64" ]; then \
cd /opt/nixl && \
mkdir build && \
meson setup build/ --buildtype=release --prefix=/usr/local/nixl -Dgds_path=/usr/local/cuda/targets/sbsa-linux && \
cd build/ && \
ninja && \
ninja install; \
else \
cd /opt/nixl && \
mkdir build && \
meson setup build/ --buildtype=release --prefix=/usr/local/nixl && \
cd build/ && \
ninja && \
ninja install; \
fi
### NATS & ETCD SETUP ###
# nats
RUN wget --tries=3 --waitretry=5 https://github.com/nats-io/nats-server/releases/download/v2.10.28/nats-server-v2.10.28-${ARCH}.deb && \
dpkg -i nats-server-v2.10.28-${ARCH}.deb && rm nats-server-v2.10.28-${ARCH}.deb
# etcd
ENV ETCD_VERSION="v3.5.21"
RUN wget --tries=3 --waitretry=5 https://github.com/etcd-io/etcd/releases/download/$ETCD_VERSION/etcd-$ETCD_VERSION-linux-${ARCH}.tar.gz -O /tmp/etcd.tar.gz && \
mkdir -p /usr/local/bin/etcd && \
tar -xvf /tmp/etcd.tar.gz -C /usr/local/bin/etcd --strip-components=1 && \
rm /tmp/etcd.tar.gz
ENV PATH=/usr/local/bin/etcd/:$PATH
### VIRTUAL ENVIRONMENT SETUP ###
# Install uv and create virtualenv
COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /bin/
RUN mkdir /opt/dynamo && \
uv venv /opt/dynamo/venv --python 3.12
# Activate virtual environment
ENV VIRTUAL_ENV=/opt/dynamo/venv
ENV PATH="${VIRTUAL_ENV}/bin:${PATH}"
# Install NIXL Python module
# TODO: Move gds_path selection based on arch into NIXL build
RUN if [ "$ARCH" = "arm64" ]; then \
cd /opt/nixl && uv build . --out-dir /workspace/wheels/nixl \
--config-settings=setup-args="-Dgds_path=/usr/local/cuda/targets/sbsa-linux"; \
else \
cd /opt/nixl && uv build . --out-dir /workspace/wheels/nixl; \
fi
# Install the wheel
# TODO: Move NIXL wheel install to the wheel_builder stage
RUN uv pip install /workspace/wheels/nixl/*.whl
# Install vllm - keep this early in Dockerfile to avoid
# rebuilds from unrelated source code changes
ARG VLLM_REF="059d4cd"
ENV CUDA_HOME=/usr/local/cuda
RUN --mount=type=bind,source=./container/deps/,target=/tmp/deps \
--mount=type=cache,target=/root/.cache/uv \
uv pip install pip cuda-python && \
mkdir /opt/vllm && \
cd /opt/vllm && \
git clone https://github.com/vllm-project/vllm.git && \
cd vllm && \
git checkout $VLLM_REF && \
VLLM_USE_PRECOMPILED=1 uv pip install -e . && \
cd tools/ep_kernels && \
bash install_python_libraries.sh && \
cd ep_kernels_workspace && \
git clone --recursive https://github.com/deepseek-ai/DeepGEMM.git && \
cd DeepGEMM && \
python setup.py install
# Common dependencies
RUN --mount=type=bind,source=./container/deps/requirements.txt,target=/tmp/requirements.txt \
uv pip install --requirement /tmp/requirements.txt
# Install test dependencies
RUN --mount=type=bind,source=./container/deps/requirements.test.txt,target=/tmp/requirements.txt \
uv pip install --requirement /tmp/requirements.txt
# ### MISC UTILITY SETUP ###
# Finish pyright install
RUN pyright --help > /dev/null 2>&1
# Enable Git operations in the /workspace directory
RUN printf "[safe]\n directory=/workspace\n" > /root/.gitconfig
# Install prometheus
ARG PROM_VERSION=3.4.1
RUN apt-get update && apt-get install -y --no-install-recommends \
curl tar ca-certificates && \
rm -rf /var/lib/apt/lists/*
RUN ARCH=$(dpkg --print-architecture) && \
case "$ARCH" in \
amd64) PLATFORM=linux-amd64 ;; \
arm64) PLATFORM=linux-arm64 ;; \
*) echo "Unsupported architecture: $ARCH" && exit 1 ;; \
esac && \
curl -fsSL https://github.com/prometheus/prometheus/releases/download/v${PROM_VERSION}/prometheus-${PROM_VERSION}.${PLATFORM}.tar.gz \
| tar -xz -C /tmp && \
mv /tmp/prometheus-${PROM_VERSION}.${PLATFORM}/prometheus /usr/local/bin/ && \
chmod +x /usr/local/bin/prometheus && \
rm -rf /tmp/prometheus-${PROM_VERSION}.${PLATFORM}
### BUILDS ###
# Rust build/dev dependencies
RUN apt update -y && \
apt install --no-install-recommends -y \
build-essential \
protobuf-compiler \
cmake \
libssl-dev \
pkg-config
ENV RUSTUP_HOME=/usr/local/rustup \
CARGO_HOME=/usr/local/cargo \
PATH=/usr/local/cargo/bin:$PATH \
RUST_VERSION=1.87.0
# Define Rust target based on ARCH_ALT ARG
ARG RUSTARCH=${ARCH_ALT}-unknown-linux-gnu
# Install Rust using RUSTARCH derived from ARCH_ALT
RUN wget --tries=3 --waitretry=5 "https://static.rust-lang.org/rustup/archive/1.28.1/${RUSTARCH}/rustup-init" && \
# TODO: Add SHA check back based on RUSTARCH
chmod +x rustup-init && \
./rustup-init -y --no-modify-path --profile default --default-toolchain $RUST_VERSION --default-host ${RUSTARCH} && \
rm rustup-init && \
chmod -R a+w $RUSTUP_HOME $CARGO_HOME
ARG CARGO_BUILD_JOBS
# Set CARGO_BUILD_JOBS to 16 if not provided
# This is to prevent cargo from building $(nproc) jobs in parallel,
# which might exceed the number of opened files limit.
ENV CARGO_BUILD_JOBS=${CARGO_BUILD_JOBS:-16}
#######################################
########## Local Development ##########
#######################################
FROM base AS local-dev
# https://code.visualstudio.com/remote/advancedcontainers/add-nonroot-user
# Will use the default ubuntu user, but give sudo access
# Needed so files permissions aren't set to root ownership when writing from inside container
# Don't want ubuntu to be editable, just change uid and gid. User ubuntu is hardcoded in .devcontainer
ENV USERNAME=ubuntu
ARG USER_UID=1000
ARG USER_GID=1000
RUN apt-get update && apt-get install -y sudo gnupg2 gnupg1 \
&& echo "$USERNAME ALL=(root) NOPASSWD:ALL" > /etc/sudoers.d/$USERNAME \
&& chmod 0440 /etc/sudoers.d/$USERNAME \
&& mkdir -p /home/$USERNAME \
&& chown -R $USERNAME:$USERNAME /home/$USERNAME \
&& rm -rf /var/lib/apt/lists/* \
&& chsh -s /bin/bash $USERNAME
# This is a slow operation (~40s on my cpu)
# Much better than chown -R $USERNAME:$USERNAME /opt/dynamo/venv (~10min on my cpu)
COPY --from=base --chown=$USER_UID:$USER_GID /opt/dynamo/venv/ /opt/dynamo/venv/
RUN chown $USERNAME:$USERNAME /opt/dynamo/venv
COPY --from=base --chown=$USERNAME:$USERNAME /usr/local/bin /usr/local/bin
# so we can use maturin develop
RUN uv pip install maturin[patchelf]
USER $USERNAME
ENV HOME=/home/$USERNAME
ENV PYTHONPATH=$HOME/dynamo/deploy/sdk/src:$PYTHONPATH:$HOME/dynamo/components/planner/src:$PYTHONPATH
ENV CARGO_TARGET_DIR=$HOME/dynamo/.build/target
WORKDIR $HOME
# https://code.visualstudio.com/remote/advancedcontainers/persist-bash-history
RUN SNIPPET="export PROMPT_COMMAND='history -a' && export HISTFILE=$HOME/.commandhistory/.bash_history" \
&& mkdir -p $HOME/.commandhistory \
&& touch $HOME/.commandhistory/.bash_history \
&& echo "$SNIPPET" >> "$HOME/.bashrc"
RUN mkdir -p /home/$USERNAME/.cache/
ENTRYPOINT ["/opt/nvidia/nvidia_entrypoint.sh"]
##################################
##### Wheel Build Image ##########
##################################
# Redeclare ARCH_ALT ARG so it's available for interpolation in the FROM instruction
ARG ARCH_ALT
FROM quay.io/pypa/manylinux_2_28_${ARCH_ALT} AS wheel_builder
ARG CARGO_BUILD_JOBS
# Set CARGO_BUILD_JOBS to 16 if not provided
# This is to prevent cargo from building $(nproc) jobs in parallel,
# which might exceed the number of opened files limit.
ENV CARGO_BUILD_JOBS=${CARGO_BUILD_JOBS:-16}
# Use build arg RELEASE_BUILD = true to generate wheels for Python 3.10, 3.11 and 3.12.
ARG RELEASE_BUILD
WORKDIR /workspace
RUN yum update -y \
&& yum install -y llvm-toolset \
&& yum install -y python3.12-devel \
&& yum install -y protobuf-compiler \
&& yum clean all \
&& rm -rf /var/cache/yum
ENV RUSTUP_HOME=/usr/local/rustup \
CARGO_HOME=/usr/local/cargo \
CARGO_TARGET_DIR=/workspace/target \
VIRTUAL_ENV=/opt/dynamo/venv
COPY --from=base $RUSTUP_HOME $RUSTUP_HOME
COPY --from=base $CARGO_HOME $CARGO_HOME
COPY --from=base /usr/local/nixl /opt/nvidia/nvda_nixl
COPY --from=base /workspace /workspace
COPY --from=base $VIRTUAL_ENV $VIRTUAL_ENV
ENV PATH=$CARGO_HOME/bin:$VIRTUAL_ENV/bin:$PATH
# Copy configuration files
COPY pyproject.toml /workspace/
COPY README.md /workspace/
COPY LICENSE /workspace/
COPY Cargo.toml /workspace/
COPY Cargo.lock /workspace/
COPY rust-toolchain.toml /workspace/
COPY hatch_build.py /workspace/
# Copy source code
COPY lib/ /workspace/lib/
COPY components /workspace/components
COPY launch /workspace/launch
COPY deploy/sdk /workspace/deploy/sdk
RUN cargo build \
--release \
--locked \
--features dynamo-llm/block-manager \
--workspace
# Build dynamo wheel
RUN uv build --wheel --out-dir /workspace/dist && \
cd /workspace/lib/bindings/python && \
uv pip install maturin[patchelf] && \
maturin build --release --features block-manager --out /workspace/dist && \
if [ "$RELEASE_BUILD" = "true" ]; then \
# do not enable KVBM feature, ensure compatibility with lower glibc
uv run --python 3.11 maturin build --release --out /workspace/dist && \
uv run --python 3.10 maturin build --release --out /workspace/dist; \
fi
#######################################
########## CI Minimum Image ###########
#######################################
FROM base AS ci_minimum
ENV DYNAMO_HOME=/workspace
ENV CARGO_TARGET_DIR=/workspace/target
WORKDIR /workspace
COPY --from=wheel_builder /workspace /workspace
COPY --from=wheel_builder /opt/nvidia/nvda_nixl /opt/nvidia/nvda_nixl
# Copy Cargo cache to avoid re-downloading dependencies
COPY --from=wheel_builder $CARGO_HOME $CARGO_HOME
# Copy rest of the code
COPY . /workspace
# Build C bindings, creates lib/bindings/c/include
#
# TODO: In theory the 'cargo build' in earlier stage covers this, we "just" need to copy the
# `lib/bindings/c/include` folder that build.rs generated across.
# I couldn't get that to work, hence TODO.
RUN cd /workspace/lib/bindings/c && cargo build --release --locked
# Package the bindings
RUN mkdir -p /opt/dynamo/bindings/wheels && \
mkdir /opt/dynamo/bindings/lib && \
cp dist/ai_dynamo*cp312*.whl /opt/dynamo/bindings/wheels/. && \
cp target/release/libdynamo_llm_capi.so /opt/dynamo/bindings/lib/. && \
cp -r lib/bindings/c/include /opt/dynamo/bindings/. && \
cp target/release/dynamo-run /usr/local/bin && \
cp target/release/metrics /usr/local/bin && \
cp target/release/mock_worker /usr/local/bin
RUN uv pip install /workspace/dist/ai_dynamo_runtime*cp312*.whl && \
uv pip install /workspace/dist/ai_dynamo*any.whl
RUN uv pip install /workspace/benchmarks
# Copy launch banner
RUN --mount=type=bind,source=./container/launch_message.txt,target=/workspace/launch_message.txt \
sed '/^#\s/d' /workspace/launch_message.txt > ~/.launch_screen && \
echo "cat ~/.launch_screen" >> ~/.bashrc
ENV LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/opt/nvidia/nvda_nixl/lib/x86_64-linux-gnu/
########################################
########## Development Image ###########
########################################
FROM ci_minimum AS dev
ENTRYPOINT ["/opt/nvidia/nvidia_entrypoint.sh"]
CMD []
####################################
########## Runtime Image ###########
####################################
FROM ${RUNTIME_IMAGE}:${RUNTIME_IMAGE_TAG} AS runtime
WORKDIR /workspace
ENV DYNAMO_HOME=/workspace
ENV VIRTUAL_ENV=/opt/dynamo/venv
ENV PATH="${VIRTUAL_ENV}/bin:${PATH}"
# Install build-essential and python3-dev as apt dependencies
RUN apt-get update && \
DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
build-essential \
python3-dev && \
rm -rf /var/lib/apt/lists/*
### COPY BINDINGS ###
# Copy all bindings (wheels, lib, include) from ci_minimum
COPY --from=ci_minimum /opt/dynamo/bindings /opt/dynamo/bindings
### COPY NATS & ETCD ###
# Copy nats and etcd from base image
COPY --from=base /usr/bin/nats-server /usr/bin/nats-server
COPY --from=base /usr/local/bin/etcd/ /usr/local/bin/etcd/
# Copy UCX from base image as plugin for NIXL
# Copy NIXL source from base image (required for NIXL plugins)
COPY --from=base /usr/local/ucx /usr/local/ucx
COPY --from=base /usr/local/nixl /usr/local/nixl
ARG ARCH_ALT
ENV NIXL_PLUGIN_DIR=/usr/local/nixl/lib/${ARCH_ALT}-linux-gnu/plugins
ENV LD_LIBRARY_PATH=/usr/local/nixl/lib/${ARCH_ALT}-linux-gnu:/usr/local/nixl/lib/${ARCH_ALT}-linux-gnu/plugins:/usr/local/ucx/lib:$LD_LIBRARY_PATH
# Setup the python environment
COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /bin/
RUN uv venv $VIRTUAL_ENV --python 3.12 && \
echo "source $VIRTUAL_ENV/bin/activate" >> ~/.bashrc
# Common dependencies
RUN --mount=type=bind,source=./container/deps/requirements.txt,target=/tmp/requirements.txt \
uv pip install --requirement /tmp/requirements.txt
# Install the wheels and symlink executables to /usr/local/bin so dynamo components can use them
# Dynamo components currently do not have the VIRTUAL_ENV in their PATH, so we need to symlink the executables
#Copy NIXL and Dynamo wheels into wheelhouse
COPY --from=base /workspace/wheels/nixl/*.whl wheelhouse/
COPY --from=wheel_builder /workspace/dist/*.whl wheelhouse/
RUN uv pip install ai-dynamo --find-links wheelhouse && \
uv pip install nixl --find-links wheelhouse && \
ln -sf $VIRTUAL_ENV/bin/* /usr/local/bin/ && \
rm -r wheelhouse
# Copy launch banner
RUN --mount=type=bind,source=./container/launch_message.txt,target=/workspace/launch_message.txt \
sed '/^#\s/d' /workspace/launch_message.txt > ~/.launch_screen && \
echo "cat ~/.launch_screen" >> ~/.bashrc
# Copy examples
COPY ./examples examples/
ENTRYPOINT [ "/usr/bin/bash" ]
CMD []
......@@ -49,7 +49,7 @@ PYTHON_PACKAGE_VERSION=${current_tag:-$latest_tag.dev+$commit_id}
# dependencies are specified in the /container/deps folder and
# installed within framework specific sections of the Dockerfile.
declare -A FRAMEWORKS=(["VLLM"]=1 ["TENSORRTLLM"]=2 ["NONE"]=3 ["SGLANG"]=4 ["VLLM_V1"]=5)
declare -A FRAMEWORKS=(["VLLM"]=1 ["TENSORRTLLM"]=2 ["NONE"]=3 ["SGLANG"]=4)
DEFAULT_FRAMEWORK=VLLM
SOURCE_DIR=$(dirname "$(readlink -f "$0")")
......@@ -111,9 +111,6 @@ NONE_BASE_IMAGE_TAG="24.04"
SGLANG_BASE_IMAGE="nvcr.io/nvidia/cuda-dl-base"
SGLANG_BASE_IMAGE_TAG="25.01-cuda12.8-devel-ubuntu24.04"
VLLM_V1_BASE_IMAGE="nvcr.io/nvidia/cuda-dl-base"
VLLM_V1_BASE_IMAGE_TAG="25.01-cuda12.8-devel-ubuntu24.04"
NIXL_COMMIT=3c47a48955e6f96bd5d4fb43a9d80bb64722f8e4
NIXL_REPO=ai-dynamo/nixl.git
......@@ -403,8 +400,6 @@ elif [[ $FRAMEWORK == "NONE" ]]; then
DOCKERFILE=${SOURCE_DIR}/Dockerfile.none
elif [[ $FRAMEWORK == "SGLANG" ]]; then
DOCKERFILE=${SOURCE_DIR}/Dockerfile.sglang
elif [[ $FRAMEWORK == "VLLM_V1" ]]; then
DOCKERFILE=${SOURCE_DIR}/Dockerfile.vllm_v1
fi
NIXL_DIR="/tmp/nixl/nixl_src"
......
<!--
SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
SPDX-License-Identifier: Apache-2.0
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
Apply this patch to Python source code from vLLM release [v0.7.2](https://github.com/vllm-project/vllm/releases/tag/v0.7.2).
\ No newline at end of file
#!/bin/bash
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
set -e
# Function to print usage
print_usage() {
echo "Usage: $0 --original-ref <original_tag_or_branch> --fork-repo <fork_repo_url> --fork-ref <fork_tag_or_branch> --output <patch_output_path>"
echo
echo "Arguments:"
echo " --original-ref The tag or branch name from the original vllm-project/vllm repo"
echo " --fork-repo The URL of the forked repository"
echo " --fork-ref The tag or branch name from the forked repository"
echo " --output Path where the generated patch file should be saved"
echo
echo "Example:"
echo " $0 --original-ref v0.2.0 --fork-repo https://github.com/user/vllm.git --fork-ref feature-branch --output ./my-patch.diff"
exit 1
}
# Parse named arguments
while [[ $# -gt 0 ]]; do
case $1 in
--original-ref)
ORIGINAL_REF="$2"
shift 2
;;
--fork-repo)
FORK_REPO="$2"
shift 2
;;
--fork-ref)
FORK_REF="$2"
shift 2
;;
--output)
PATCH_OUTPUT="$2"
shift 2
;;
*)
print_usage
;;
esac
done
# Check if all required arguments are provided
if [ -z "$ORIGINAL_REF" ] || [ -z "$FORK_REPO" ] || [ -z "$FORK_REF" ] || [ -z "$PATCH_OUTPUT" ]; then
print_usage
fi
# Convert patch output path to absolute path if it's relative
if [[ ! "$PATCH_OUTPUT" = /* ]]; then
PATCH_OUTPUT="$(pwd)/${PATCH_OUTPUT}"
fi
TEMP_DIR=$(mktemp -d)
# Clean up temp directory on script exit
trap 'rm -rf "$TEMP_DIR"' EXIT
# Clone original vLLM to a temp directory
git clone https://github.com/vllm-project/vllm.git "$TEMP_DIR/original_vllm"
cd "$TEMP_DIR/original_vllm"
git remote add fork "$FORK_REPO"
git fetch fork "$FORK_REF"
git diff "$ORIGINAL_REF" fork/"$FORK_REF" > "$PATCH_OUTPUT"
echo "Patch created successfully: $PATCH_OUTPUT"
\ No newline at end of file
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import pytest
try:
import vllm
except ImportError:
vllm = None # type: ignore
pytestmark = pytest.mark.pre_merge
# TODO: Consider `pytest.mark.vllm` and running tests based on environment
@pytest.mark.skipif(vllm is None, reason="Skipping vllm tests, vllm not installed")
def test_version():
# Verify that the image has the patched version of vllm
assert vllm.__version__.endswith("0.8.4") # type: ignore
......@@ -24,7 +24,7 @@ RUN_PREFIX=
# dependencies are specified in the /container/deps folder and
# installed within framework specific sections of the Dockerfile.
declare -A FRAMEWORKS=(["VLLM"]=1 ["TENSORRTLLM"]=2 ["SGLANG"]=3 ["VLLM_V1"]=4)
declare -A FRAMEWORKS=(["VLLM"]=1 ["TENSORRTLLM"]=2 ["SGLANG"]=3)
DEFAULT_FRAMEWORK=VLLM
SOURCE_DIR=$(dirname "$(readlink -f "$0")")
......
......@@ -25,7 +25,7 @@ graph TD
The dcgm-exporter service in the Docker Compose network is configured to use port 9401 instead of the default port 9400. This adjustment is made to avoid port conflicts with other dcgm-exporter instances that may be running simultaneously. Such a configuration is typical in distributed systems like SLURM.
As of Q2 2025, Dynamo HTTP Frontend metrics are exposed when you build containers with `--framework VLLM_V1` or `--framework TENSORRTLLM`.
As of Q2 2025, Dynamo HTTP Frontend metrics are exposed when you build containers with `--framework VLLM` or `--framework TENSORRTLLM`.
## Getting Started
......
......@@ -36,11 +36,11 @@ docker compose -f deploy/metrics/docker-compose.yml up -d
### Build and Run docker
```bash
./container/build.sh --framework VLLM_V1
./container/build.sh
```
```bash
./container/run.sh -it --framework VLLM_V1 [--mount-workspace]
./container/run.sh -it [--mount-workspace]
```
This includes the specific commit [vllm-project/vllm#19790](https://github.com/vllm-project/vllm/pull/19790) which enables support for external control of the DP ranks.
......@@ -129,9 +129,9 @@ For Kubernetes deployment, YAML manifests are provided in the `deploy/` director
- **Dynamo Cloud**: Follow the [Quickstart Guide](../../docs/guides/dynamo_deploy/quickstart.md) to deploy Dynamo Cloud first.
- **Container Images**: The deployment files currently require access to `nvcr.io/nvidian/nim-llm-dev/vllm_v1-runtime`. If you don't have access, build and push your own image:
- **Container Images**: The deployment files currently require access to `nvcr.io/nvidian/nim-llm-dev/vllm-runtime`. If you don't have access, build and push your own image:
```bash
./container/build.sh --framework VLLM_V1
./container/build.sh --framework VLLM
# Tag and push to your container registry
# Update the image references in the YAML files
```
......
......@@ -186,7 +186,7 @@ class VLLMProcess(ManagedProcess):
vllm_configs = {
"aggregated": VLLMConfig(
name="aggregated",
directory="/workspace/examples/llm",
directory="/workspace/examples/vllm",
script_name="agg.sh",
marks=[pytest.mark.gpu_1, pytest.mark.vllm],
endpoints=["v1/chat/completions", "v1/completions"],
......@@ -199,7 +199,7 @@ vllm_configs = {
),
"disaggregated": VLLMConfig(
name="disaggregated",
directory="/workspace/examples/llm",
directory="/workspace/examples/vllm",
script_name="disagg.sh",
marks=[pytest.mark.gpu_2, pytest.mark.vllm],
endpoints=["v1/chat/completions", "v1/completions"],
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment