Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
dynamo
Commits
b738e6a0
Unverified
Commit
b738e6a0
authored
Aug 29, 2025
by
Ziqi Fan
Committed by
GitHub
Aug 29, 2025
Browse files
chore: rm not needed kvbm Dockerfile (#2789)
parent
295964e3
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
2 additions
and
501 deletions
+2
-501
container/Dockerfile.kvbm
container/Dockerfile.kvbm
+0
-497
container/build.sh
container/build.sh
+1
-3
container/run.sh
container/run.sh
+1
-1
No files found.
container/Dockerfile.kvbm
deleted
100644 → 0
View file @
295964e3
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
ARG BASE_IMAGE="nvcr.io/nvidia/cuda-dl-base"
# FIXME: NCCL will hang with 25.03, so use 25.01 for now
# Please check https://github.com/ai-dynamo/dynamo/pull/1065
# for details and reproducer to manually test if the image
# can be updated to later versions.
ARG BASE_IMAGE_TAG="25.01-cuda12.8-devel-ubuntu24.04"
ARG RELEASE_BUILD
ARG RUNTIME_IMAGE="nvcr.io/nvidia/cuda"
ARG RUNTIME_IMAGE_TAG="12.8.1-runtime-ubuntu24.04"
# Define general architecture ARGs for supporting both x86 and aarch64 builds.
# ARCH: Used for package suffixes (e.g., amd64, arm64)
# ARCH_ALT: Used for Rust targets, manylinux suffix (e.g., x86_64, aarch64)
#
# Default values are for x86/amd64:
# --build-arg ARCH=amd64 --build-arg ARCH_ALT=x86_64
#
# For arm64/aarch64, build with:
# --build-arg ARCH=arm64 --build-arg ARCH_ALT=aarch64
#
# NOTE: There isn't an easy way to define one of these values based on the other value
# without adding if statements everywhere, so just define both as ARGs for now.
ARG ARCH=amd64
ARG ARCH_ALT=x86_64
FROM ${BASE_IMAGE}:${BASE_IMAGE_TAG} AS nixl_base
# Redeclare ARCH and ARCH_ALT so they're available in this stage
ARG ARCH
ARG ARCH_ALT
WORKDIR /opt/nixl
# Add a cache hint that only changes when the nixl commit changes
ARG NIXL_COMMIT
# This line acts as a cache key - it only changes when NIXL_COMMIT changes
RUN echo "NIXL commit: ${NIXL_COMMIT}" > /opt/nixl/commit.txt
# Copy the nixl source
COPY --from=nixl . .
##################################
########## Base Image ############
##################################
FROM ${BASE_IMAGE}:${BASE_IMAGE_TAG} AS base
# Redeclare ARCH and ARCH_ALT so they're available in this stage
ARG ARCH
ARG ARCH_ALT
USER root
ARG PYTHON_VERSION=3.12
RUN apt-get update -y && \
apt-get install -y \
# NIXL build dependencies
cmake \
meson \
ninja-build \
pybind11-dev \
# Rust build dependencies
clang \
libclang-dev \
git \
# Install utilities
nvtop \
tmux \
vim \
autoconf \
libtool
# These headers are missing with the hpcx installer, required
# by UCX to find RDMA devices
RUN apt-get update -y && \
apt-get install -y --no-install-recommends \
--reinstall libibverbs-dev rdma-core ibverbs-utils libibumad-dev \
libnuma-dev librdmacm-dev ibverbs-providers
ARG NIXL_UCX_REF=v1.19.0
WORKDIR /workspace
### UCX EFA Setup ###
RUN rm -rf /opt/hpcx/ucx
RUN rm -rf /usr/local/ucx
RUN echo "Building UCX with reference $NIXL_UCX_REF"
RUN cd /usr/local/src && \
git clone https://github.com/openucx/ucx.git && \
cd ucx && \
git checkout $NIXL_UCX_REF && \
./autogen.sh && ./configure \
--prefix=/usr/local/ucx \
--enable-shared \
--disable-static \
--disable-doxygen-doc \
--enable-optimizations \
--enable-cma \
--enable-devel-headers \
--with-cuda=/usr/local/cuda \
--with-verbs \
--with-efa \
--with-dm \
--with-gdrcopy=/usr/local \
--enable-mt && \
make -j && \
make -j install-strip && \
ldconfig
ENV LD_LIBRARY_PATH=/usr/lib:/usr/local/ucx/lib:$LD_LIBRARY_PATH
ENV CPATH=/usr/include
ENV PATH=/usr/bin:$PATH
ENV PKG_CONFIG_PATH=/usr/lib/pkgconfig
SHELL ["/bin/bash", "-c"]
WORKDIR /workspace
### NIXL SETUP ###
# Copy nixl source, and use commit hash as cache hint
COPY --from=nixl_base /opt/nixl /opt/nixl
COPY --from=nixl_base /opt/nixl/commit.txt /opt/nixl/commit.txt
RUN cd /opt/nixl && \
mkdir build && \
meson setup build/ --buildtype=release --prefix=/usr/local/nixl && \
cd build/ && \
ninja && \
ninja install
### NATS & ETCD SETUP ###
# nats
RUN wget --tries=3 --waitretry=5 https://github.com/nats-io/nats-server/releases/download/v2.10.24/nats-server-v2.10.24-${ARCH}.deb && \
dpkg -i nats-server-v2.10.24-${ARCH}.deb && rm nats-server-v2.10.24-${ARCH}.deb
# etcd
ENV ETCD_VERSION="v3.5.18"
RUN wget --tries=3 --waitretry=5 https://github.com/etcd-io/etcd/releases/download/$ETCD_VERSION/etcd-$ETCD_VERSION-linux-${ARCH}.tar.gz -O /tmp/etcd.tar.gz && \
mkdir -p /usr/local/bin/etcd && \
tar -xvf /tmp/etcd.tar.gz -C /usr/local/bin/etcd --strip-components=1 && \
rm /tmp/etcd.tar.gz
ENV PATH=/usr/local/bin/etcd/:$PATH
### VIRTUAL ENVIRONMENT SETUP ###
# Install uv and create virtualenv
COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /bin/
RUN mkdir /opt/dynamo && \
uv venv /opt/dynamo/venv --python 3.12
# Activate virtual environment
ENV VIRTUAL_ENV=/opt/dynamo/venv
ENV PATH="${VIRTUAL_ENV}/bin:${PATH}"
# Install NIXL Python module
RUN cd /opt/nixl && uv build . --out-dir /workspace/wheels/nixl
# Install the wheel
# TODO: Move NIXL wheel install to the wheel_builder stage
RUN uv pip install /workspace/wheels/nixl/*.whl
# Install forked vLLM with KVBM integration
ARG VLLM_REF="dynamo/stage-1"
ENV CUDA_HOME=/usr/local/cuda
RUN --mount=type=bind,source=./container/deps/,target=/tmp/deps \
--mount=type=cache,target=/root/.cache/uv \
uv pip install pip cuda-python && \
mkdir /opt/vllm && \
cd /opt/vllm && \
git clone https://github.com/ryanolson/vllm.git && \
cd vllm && \
git checkout $VLLM_REF && \
VLLM_USE_PRECOMPILED=1 uv pip install -e .
# Common dependencies
RUN --mount=type=bind,source=./container/deps/requirements.txt,target=/tmp/requirements.txt \
uv pip install --requirement /tmp/requirements.txt
# Install test dependencies
RUN --mount=type=bind,source=./container/deps/requirements.test.txt,target=/tmp/requirements.txt \
uv pip install --requirement /tmp/requirements.txt
# ### MISC UTILITY SETUP ###
# Finish pyright install
RUN pyright --help > /dev/null 2>&1
# Enable Git operations in the /workspace directory
RUN printf "[safe]\n directory=/workspace\n" > /root/.gitconfig
RUN ln -sf /bin/bash /bin/sh
# Install prometheus
ARG PROM_VERSION=3.4.1
RUN apt-get update && apt-get install -y --no-install-recommends \
curl tar ca-certificates && \
rm -rf /var/lib/apt/lists/*
RUN ARCH=$(dpkg --print-architecture) && \
case "$ARCH" in \
amd64) PLATFORM=linux-amd64 ;; \
arm64) PLATFORM=linux-arm64 ;; \
*) echo "Unsupported architecture: $ARCH" && exit 1 ;; \
esac && \
curl -fsSL https://github.com/prometheus/prometheus/releases/download/v${PROM_VERSION}/prometheus-${PROM_VERSION}.${PLATFORM}.tar.gz \
| tar -xz -C /tmp && \
mv /tmp/prometheus-${PROM_VERSION}.${PLATFORM}/prometheus /usr/local/bin/ && \
chmod +x /usr/local/bin/prometheus && \
rm -rf /tmp/prometheus-${PROM_VERSION}.${PLATFORM}
### BUILDS ###
# Rust build/dev dependencies
RUN apt update -y && \
apt install --no-install-recommends -y \
build-essential \
protobuf-compiler \
cmake \
libssl-dev \
pkg-config
ENV RUSTUP_HOME=/usr/local/rustup \
CARGO_HOME=/usr/local/cargo \
PATH=/usr/local/cargo/bin:$PATH \
RUST_VERSION=1.89.0
# Define Rust target based on ARCH_ALT ARG
ARG RUSTARCH=${ARCH_ALT}-unknown-linux-gnu
# Install Rust using RUSTARCH derived from ARCH_ALT
RUN wget --tries=3 --waitretry=5 "https://static.rust-lang.org/rustup/archive/1.28.1/${RUSTARCH}/rustup-init" && \
# TODO: Add SHA check back based on RUSTARCH
chmod +x rustup-init && \
./rustup-init -y --no-modify-path --profile default --default-toolchain $RUST_VERSION --default-host ${RUSTARCH} && \
rm rustup-init && \
chmod -R a+w $RUSTUP_HOME $CARGO_HOME
ARG CARGO_BUILD_JOBS
# Set CARGO_BUILD_JOBS to 16 if not provided
# This is to prevent cargo from building $(nproc) jobs in parallel,
# which might exceed the number of opened files limit.
ENV CARGO_BUILD_JOBS=${CARGO_BUILD_JOBS:-16}
#######################################
########## Local Development ##########
#######################################
FROM base AS local-dev
# https://code.visualstudio.com/remote/advancedcontainers/add-nonroot-user
# Will use the default ubuntu user, but give sudo access
# Needed so files permissions aren't set to root ownership when writing from inside container
# Don't want ubuntu to be editable, just change uid and gid. User ubuntu is hardcoded in .devcontainer
ENV USERNAME=ubuntu
ARG USER_UID=1000
ARG USER_GID=1000
RUN apt-get update && apt-get install -y sudo gnupg2 gnupg1 \
&& echo "$USERNAME ALL=(root) NOPASSWD:ALL" > /etc/sudoers.d/$USERNAME \
&& chmod 0440 /etc/sudoers.d/$USERNAME \
&& mkdir -p /home/$USERNAME \
&& chown -R $USERNAME:$USERNAME /home/$USERNAME \
&& rm -rf /var/lib/apt/lists/* \
&& chsh -s /bin/bash $USERNAME
# This is a slow operation (~40s on my cpu)
# Much better than chown -R $USERNAME:$USERNAME /opt/dynamo/venv (~10min on my cpu)
COPY --from=base --chown=$USER_UID:$USER_GID /opt/dynamo/venv/ /opt/dynamo/venv/
RUN chown $USERNAME:$USERNAME /opt/dynamo/venv
COPY --from=base --chown=$USERNAME:$USERNAME /usr/local/bin /usr/local/bin
# so we can use maturin develop
RUN uv pip install maturin[patchelf]
USER $USERNAME
ENV HOME=/home/$USERNAME
ENV PYTHONPATH=$HOME/dynamo/deploy/sdk/src:$PYTHONPATH:$HOME/dynamo/components/planner/src:$PYTHONPATH
ENV CARGO_TARGET_DIR=$HOME/dynamo/.build/target
WORKDIR $HOME
# https://code.visualstudio.com/remote/advancedcontainers/persist-bash-history
RUN SNIPPET="export PROMPT_COMMAND='history -a' && export HISTFILE=$HOME/.commandhistory/.bash_history" \
&& mkdir -p $HOME/.commandhistory \
&& touch $HOME/.commandhistory/.bash_history \
&& echo "$SNIPPET" >> "$HOME/.bashrc"
RUN mkdir -p /home/$USERNAME/.cache/
ENV VLLM_KV_CAPI_PATH=$HOME/dynamo/.build/target/debug/libdynamo_llm_capi.so
ENTRYPOINT ["/opt/nvidia/nvidia_entrypoint.sh"]
##################################
##### Wheel Build Image ##########
##################################
# Redeclare ARCH_ALT ARG so it's available for interpolation in the FROM instruction
ARG ARCH_ALT
FROM quay.io/pypa/manylinux_2_28_${ARCH_ALT} AS wheel_builder
ARG CARGO_BUILD_JOBS
# Set CARGO_BUILD_JOBS to 16 if not provided
# This is to prevent cargo from building $(nproc) jobs in parallel,
# which might exceed the number of opened files limit.
ENV CARGO_BUILD_JOBS=${CARGO_BUILD_JOBS:-16}
# Use build arg RELEASE_BUILD = true to generate wheels for Python 3.10, 3.11 and 3.12.
ARG RELEASE_BUILD
WORKDIR /workspace
RUN yum update -y \
&& yum install -y llvm-toolset \
&& yum install -y python3.12-devel \
&& yum install -y protobuf-compiler \
&& yum clean all \
&& rm -rf /var/cache/yum
ENV RUSTUP_HOME=/usr/local/rustup \
CARGO_HOME=/usr/local/cargo \
CARGO_TARGET_DIR=/workspace/target \
VIRTUAL_ENV=/opt/dynamo/venv
COPY --from=base $RUSTUP_HOME $RUSTUP_HOME
COPY --from=base $CARGO_HOME $CARGO_HOME
COPY --from=base /usr/local/nixl /opt/nvidia/nvda_nixl
COPY --from=base /workspace /workspace
COPY --from=base $VIRTUAL_ENV $VIRTUAL_ENV
ENV PATH=$CARGO_HOME/bin:$VIRTUAL_ENV/bin:$PATH
# Copy configuration files
COPY pyproject.toml /workspace/
COPY README.md /workspace/
COPY LICENSE /workspace/
COPY Cargo.toml /workspace/
COPY Cargo.lock /workspace/
COPY rust-toolchain.toml /workspace/
COPY hatch_build.py /workspace/
# Copy source code
COPY lib/ /workspace/lib/
COPY components /workspace/components
COPY launch /workspace/launch
COPY deploy/sdk /workspace/deploy/sdk
RUN cargo build \
--release \
--locked \
--features dynamo-llm/block-manager \
--workspace
# Build dynamo wheel
RUN uv build --wheel --out-dir /workspace/dist && \
cd /workspace/lib/bindings/python && \
uv pip install maturin[patchelf] && \
maturin build --release --features block-manager --out /workspace/dist && \
if [ "$RELEASE_BUILD" = "true" ]; then \
# do not enable KVBM feature, ensure compatibility with lower glibc
uv run --python 3.11 maturin build --release --out /workspace/dist && \
uv run --python 3.10 maturin build --release --out /workspace/dist; \
fi
#######################################
########## CI Minimum Image ###########
#######################################
FROM base AS ci_minimum
ENV DYNAMO_HOME=/workspace
ENV CARGO_TARGET_DIR=/workspace/target
WORKDIR /workspace
COPY --from=wheel_builder /workspace /workspace
COPY --from=wheel_builder /opt/nvidia/nvda_nixl /opt/nvidia/nvda_nixl
# Copy Cargo cache to avoid re-downloading dependencies
COPY --from=wheel_builder $CARGO_HOME $CARGO_HOME
# Copy rest of the code
COPY . /workspace
# Build C bindings, creates lib/bindings/c/include
#
# TODO: In theory the 'cargo build' in earlier stage covers this, we "just" need to copy the
# `lib/bindings/c/include` folder that build.rs generated across.
# I couldn't get that to work, hence TODO.
RUN cd /workspace/lib/bindings/c && cargo build --release --locked
# Package the bindings
RUN mkdir -p /opt/dynamo/bindings/wheels && \
mkdir /opt/dynamo/bindings/lib && \
cp dist/ai_dynamo*cp312*.whl /opt/dynamo/bindings/wheels/. && \
cp target/release/libdynamo_llm_capi.so /opt/dynamo/bindings/lib/. && \
cp -r lib/bindings/c/include /opt/dynamo/bindings/. && \
cp target/release/dynamo-run /usr/local/bin && \
cp target/release/http /usr/local/bin && \
cp target/release/llmctl /usr/local/bin && \
cp target/release/metrics /usr/local/bin && \
cp target/release/mock_worker /usr/local/bin
RUN uv pip install /workspace/dist/ai_dynamo_runtime*cp312*.whl && \
uv pip install /workspace/dist/ai_dynamo*any.whl
RUN uv pip install /workspace/benchmarks
# Copy launch banner
RUN --mount=type=bind,source=./container/launch_message.txt,target=/workspace/launch_message.txt \
sed '/^#\s/d' /workspace/launch_message.txt > ~/.launch_screen && \
echo "cat ~/.launch_screen" >> ~/.bashrc
# Tell vllm to use the Dynamo LLM C API for KV Cache Routing
ENV VLLM_KV_CAPI_PATH=/opt/dynamo/bindings/lib/libdynamo_llm_capi.so
ARG ARCH_ALT
ENV NIXL_PLUGIN_DIR=/usr/local/nixl/lib/${ARCH_ALT}-linux-gnu/plugins
ENV LD_LIBRARY_PATH=/usr/local/nixl/lib/${ARCH_ALT}-linux-gnu:/usr/local/nixl/lib/${ARCH_ALT}-linux-gnu/plugins:/usr/local/ucx/lib:$LD_LIBRARY_PATH
########################################
########## Development Image ###########
########################################
FROM ci_minimum AS dev
ENTRYPOINT ["/opt/nvidia/nvidia_entrypoint.sh"]
CMD []
####################################
########## Runtime Image ###########
####################################
FROM ${RUNTIME_IMAGE}:${RUNTIME_IMAGE_TAG} AS runtime
WORKDIR /workspace
ENV DYNAMO_HOME=/workspace
ENV VIRTUAL_ENV=/opt/dynamo/venv
ENV PATH="${VIRTUAL_ENV}/bin:${PATH}"
# Install build-essential and python3-dev as apt dependencies
RUN apt-get update && \
DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
build-essential \
python3-dev && \
rm -rf /var/lib/apt/lists/*
### COPY BINDINGS ###
# Copy all bindings (wheels, lib, include) from ci_minimum
COPY --from=ci_minimum /opt/dynamo/bindings /opt/dynamo/bindings
### COPY NATS & ETCD ###
# Copy nats and etcd from base image
COPY --from=base /usr/bin/nats-server /usr/bin/nats-server
COPY --from=base /usr/local/bin/etcd/ /usr/local/bin/etcd/
ENV PATH=/usr/local/bin/etcd/:$PATH
# Copy UCX from base image as plugin for NIXL
# Copy NIXL source from base image (required for NIXL plugins)
COPY --from=base /usr/local/ucx /usr/local/ucx
COPY --from=base /usr/local/nixl /usr/local/nixl
ARG ARCH_ALT
ENV NIXL_PLUGIN_DIR=/usr/local/nixl/lib/${ARCH_ALT}-linux-gnu/plugins
ENV LD_LIBRARY_PATH=/usr/local/nixl/lib/${ARCH_ALT}-linux-gnu:/usr/local/nixl/lib/${ARCH_ALT}-linux-gnu/plugins:/usr/local/ucx/lib:$LD_LIBRARY_PATH
# Setup the python environment
COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /bin/
RUN uv venv $VIRTUAL_ENV --python 3.12 && \
echo "source $VIRTUAL_ENV/bin/activate" >> ~/.bashrc
# Common dependencies
RUN --mount=type=bind,source=./container/deps/requirements.txt,target=/tmp/requirements.txt \
uv pip install --requirement /tmp/requirements.txt
# Install test dependencies
#TODO: Remove this once we have a functional ci_minimum image built on top of the runtime image
RUN --mount=type=bind,source=./container/deps/requirements.test.txt,target=/tmp/requirements.txt \
uv pip install --requirement /tmp/requirements.txt
#TODO: Remove this once we have a functional ci_minimum image built on top of the runtime image
COPY . /workspace
RUN uv pip install /workspace/benchmarks
# Install the wheels and symlink executables to /usr/local/bin so dynamo components can use them
# Dynamo components currently do not have the VIRTUAL_ENV in their PATH, so we need to symlink the executables
#Copy NIXL and Dynamo wheels into wheelhouse
COPY --from=base /workspace/wheels/nixl/*.whl wheelhouse/
COPY --from=wheel_builder /workspace/dist/*.whl wheelhouse/
RUN uv pip install ai-dynamo[vllm] --find-links wheelhouse && \
uv pip install nixl --find-links wheelhouse && \
ln -sf $VIRTUAL_ENV/bin/* /usr/local/bin/
# Tell vllm to use the Dynamo LLM C API for KV Cache Routing
ENV VLLM_KV_CAPI_PATH="/opt/dynamo/bindings/lib/libdynamo_llm_capi.so"
# Copy launch banner
RUN --mount=type=bind,source=./container/launch_message.txt,target=/workspace/launch_message.txt \
sed '/^#\s/d' /workspace/launch_message.txt > ~/.launch_screen && \
echo "cat ~/.launch_screen" >> ~/.bashrc
ENTRYPOINT ["/opt/nvidia/nvidia_entrypoint.sh"]
CMD []
container/build.sh
View file @
b738e6a0
...
...
@@ -49,7 +49,7 @@ PYTHON_PACKAGE_VERSION=${current_tag:-$latest_tag.dev+$commit_id}
# dependencies are specified in the /container/deps folder and
# installed within framework specific sections of the Dockerfile.
declare
-A
FRAMEWORKS
=([
"VLLM"
]=
1
[
"TRTLLM"
]=
2
[
"NONE"
]=
3
[
"SGLANG"
]=
4
[
"KVBM"
]=
5
)
declare
-A
FRAMEWORKS
=([
"VLLM"
]=
1
[
"TRTLLM"
]=
2
[
"NONE"
]=
3
[
"SGLANG"
]=
4
)
DEFAULT_FRAMEWORK
=
VLLM
...
...
@@ -460,8 +460,6 @@ elif [[ $FRAMEWORK == "NONE" ]]; then
DOCKERFILE
=
${
SOURCE_DIR
}
/Dockerfile
elif
[[
$FRAMEWORK
==
"SGLANG"
]]
;
then
DOCKERFILE
=
${
SOURCE_DIR
}
/Dockerfile.sglang
elif
[[
$FRAMEWORK
==
"KVBM"
]]
;
then
DOCKERFILE
=
${
SOURCE_DIR
}
/Dockerfile.kvbm
fi
# Add NIXL_REF as a build argument
...
...
container/run.sh
View file @
b738e6a0
...
...
@@ -24,7 +24,7 @@ RUN_PREFIX=
# dependencies are specified in the /container/deps folder and
# installed within framework specific sections of the Dockerfile.
declare
-A
FRAMEWORKS
=([
"VLLM"
]=
1
[
"TRTLLM"
]=
2
[
"NONE"
]=
3
[
"SGLANG"
]=
4
[
"KVBM"
]=
5
)
declare
-A
FRAMEWORKS
=([
"VLLM"
]=
1
[
"TRTLLM"
]=
2
[
"NONE"
]=
3
[
"SGLANG"
]=
4
)
DEFAULT_FRAMEWORK
=
VLLM
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment