Unverified Commit 82bae247 authored by Anant Sharma's avatar Anant Sharma Committed by GitHub
Browse files

ci: add support for vllm sanity testing on Github (#2526)


Signed-off-by: default avatarAnant Sharma <anants@nvidia.com>
Co-authored-by: default avatarTushar Sharma <tusharma@nvidia.com>
parent e3619ce0
# SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
name: NVIDIA Github Validation
on:
push:
branches:
- main
- "pull-request/[0-9]+"
jobs:
build-test:
runs-on: gpu-l40-runners
strategy:
matrix:
framework: [vllm]
include:
- framework: vllm
target: runtime
pytest_marks: "e2e and vllm and gpu_1 and not slow"
# Do not cancel main branch runs
concurrency:
group: ${{ matrix.framework }}-build-test-${{ github.ref_name || github.run_id }}
cancel-in-progress: ${{ github.ref_name != 'main' }}
name: Build and Test - ${{ matrix.framework }}
env:
CONTAINER_ID: test_${{ github.run_id }}_${{ github.run_attempt }}_${{ github.job }}_${{ matrix.framework }}
PYTEST_XML_FILE: pytest_test_report.xml
FRAMEWORK: ${{ matrix.framework }}
TARGET: ${{ matrix.target }}
PYTEST_MARKS: ${{ matrix.pytest_marks }}
steps:
- name: Checkout repository
uses: actions/checkout@v4
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
- name: Login to NGC
if: github.event.pull_request.head.repo.full_name == github.repository || github.event_name == 'push'
run: |
echo "${{ secrets.NGC_CI_ACCESS_TOKEN }}" | docker login nvcr.io -u '$oauthtoken' --password-stdin
- name: Cleanup
if: always()
run: |
docker system prune -af
- name: Debug
run: |
lsmod | grep nvidia
sudo dmesg | grep -i nvrm || true
nvidia-smi
- name: Build image
env:
GITHUB_TOKEN: ${{ secrets.CI_TOKEN }}
AWS_DEFAULT_REGION: ${{ secrets.AWS_DEFAULT_REGION }}
SCCACHE_S3_BUCKET: ${{ secrets.SCCACHE_S3_BUCKET }}
run: |
./container/build.sh --tag ${{ matrix.framework }}:latest \
--target ${{ matrix.target }} \
--framework ${{ matrix.framework }} \
--use-sccache \
--sccache-bucket "$SCCACHE_S3_BUCKET" \
--sccache-region "$AWS_DEFAULT_REGION"
- name: Run pytest
run: |
docker run --rm --gpus all -w /workspace \
--name ${{ env.CONTAINER_ID }}_pytest \
${{ matrix.framework }}:latest \
bash -c "pytest -xsv --basetemp=/tmp --junitxml=${{ env.PYTEST_XML_FILE }} -m \"${{ env.PYTEST_MARKS }}\""
# SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
name: NVIDIA Test Github Validation
name: NVIDIA Github Validation
on:
push:
......@@ -21,6 +9,11 @@ on:
- main
pull_request:
# Do not cancel main branch runs
concurrency:
group: dynamo-build-test-${{ github.ref_name || github.run_id }}
cancel-in-progress: ${{ github.ref_name != 'main' }}
jobs:
build-test:
runs-on:
......@@ -53,7 +46,14 @@ jobs:
docker compose up -d nats-server etcd-server
- name: Run Rust checks (block-manager + integration tests)
run: |
docker run -v ${{ github.workspace }}:/workspace -w /workspace/lib/llm --name ${{ env.CONTAINER_ID }}_rust_checks ${{ steps.define_image_tag.outputs.image_tag }} bash -ec 'rustup component add rustfmt clippy && cargo fmt -- --check && cargo clippy --features block-manager --no-deps --all-targets -- -D warnings && cargo test --locked --all-targets --features=block-manager && cargo test --locked --features integration -- --nocapture'
docker run --rm -v ${{ github.workspace }}:/workspace -w /workspace/lib/llm \
--name ${{ env.CONTAINER_ID }}_rust_checks \
${{ steps.define_image_tag.outputs.image_tag }} \
bash -ec 'rustup component add rustfmt clippy && \
cargo fmt -- --check && \
cargo clippy --features block-manager --no-deps --all-targets -- -D warnings && \
cargo test --locked --all-targets --features=block-manager && \
cargo test --locked --features integration -- --nocapture'
- name: Cleanup services
if: always()
working-directory: ./deploy
......@@ -63,7 +63,10 @@ jobs:
env:
PYTEST_MARKS: "pre_merge or mypy"
run: |
docker run -v ${{ github.workspace }}:/workspace -w /workspace --name ${{ env.CONTAINER_ID }}_pytest ${{ steps.define_image_tag.outputs.image_tag }} bash -c "pytest --basetemp=/tmp --junitxml=${{ env.PYTEST_XML_FILE }} -m \"${{ env.PYTEST_MARKS }}\""
docker run -v ${{ github.workspace }}:/workspace -w /workspace \
--name ${{ env.CONTAINER_ID }}_pytest \
${{ steps.define_image_tag.outputs.image_tag }} \
bash -c "pytest --basetemp=/tmp --junitxml=${{ env.PYTEST_XML_FILE }} -m \"${{ env.PYTEST_MARKS }}\""
- name: Copy test report from test Container
if: always()
run: |
......
# SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
##################################
########## Build Arguments ########
##################################
# Base image configuration
ARG BASE_IMAGE="nvcr.io/nvidia/cuda-dl-base"
# TODO OPS-612: NCCL will hang with 25.03, so use 25.01 for now
# Please check https://github.com/ai-dynamo/dynamo/pull/1065
# for details and reproducer to manually test if the image
# can be updated to later versions.
ARG BASE_IMAGE_TAG="25.01-cuda12.8-devel-ubuntu24.04"
# Build configuration
ARG RELEASE_BUILD=false
ARG ENABLE_KVBM=false
ARG CARGO_BUILD_JOBS
# Define general architecture ARGs for supporting both x86 and aarch64 builds.
# ARCH: Used for package suffixes (e.g., amd64, arm64)
......@@ -23,6 +31,17 @@ ARG ENABLE_KVBM=false
ARG ARCH=amd64
ARG ARCH_ALT=x86_64
# SCCACHE configuration
ARG USE_SCCACHE
ARG SCCACHE_BUCKET=""
ARG SCCACHE_REGION=""
# NIXL configuration
ARG NIXL_UCX_REF=v1.19.0
ARG NIXL_REF=0.4.1
# Python configuration
ARG PYTHON_VERSION=3.12
##################################
########## Base Image ############
......@@ -30,44 +49,66 @@ ARG ARCH_ALT=x86_64
FROM ${BASE_IMAGE}:${BASE_IMAGE_TAG} AS base
# Redeclare ARCH and ARCH_ALT so they're available in this stage
# Redeclare ARGs for this stage
ARG ARCH
ARG ARCH_ALT
ARG CARGO_BUILD_JOBS
ARG NIXL_UCX_REF=v1.19.0
ARG NIXL_REF=0.4.1
# Environment variables for NIXL
ENV NIXL_SRC_DIR=/opt/nixl \
NIXL_PREFIX=/opt/nvidia/nvda_nixl \
NIXL_LIB_DIR=/opt/nvidia/nvda_nixl/lib/${ARCH_ALT}-linux-gnu \
NIXL_PLUGIN_DIR=/opt/nvidia/nvda_nixl/lib/${ARCH_ALT}-linux-gnu/plugins
ARG PYTHON_VERSION
ARG USE_SCCACHE
ARG SCCACHE_BUCKET
ARG SCCACHE_REGION
ARG NIXL_UCX_REF
ARG NIXL_REF
USER root
ARG PYTHON_VERSION=3.12
WORKDIR /opt/dynamo
##################################
########## Tool Installation #####
##################################
# Install uv package manager
COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /bin/
# Install SCCACHE if requested
COPY container/use-sccache.sh /tmp/use-sccache.sh
RUN if [ "$USE_SCCACHE" = "true" ]; then \
/tmp/use-sccache.sh install; \
fi
# Set SCCACHE environment variables
ENV SCCACHE_BUCKET=${USE_SCCACHE:+${SCCACHE_BUCKET}} \
SCCACHE_REGION=${USE_SCCACHE:+${SCCACHE_REGION}} \
SCCACHE_S3_KEY_PREFIX=${USE_SCCACHE:+${ARCH}} \
RUSTC_WRAPPER=${USE_SCCACHE:+sccache} \
CMAKE_C_COMPILER_LAUNCHER=${USE_SCCACHE:+sccache} \
CMAKE_CXX_COMPILER_LAUNCHER=${USE_SCCACHE:+sccache} \
CMAKE_CUDA_COMPILER_LAUNCHER=${USE_SCCACHE:+sccache}
##################################
########## Rust Setup ############
##################################
# Rust environment setup
ENV RUSTUP_HOME=/usr/local/rustup \
CARGO_HOME=/usr/local/cargo \
PATH=/usr/local/cargo/bin:$PATH \
RUST_VERSION=1.89.0
WORKDIR /opt/dynamo
# Define Rust target based on ARCH_ALT ARG
ARG RUSTARCH=${ARCH_ALT}-unknown-linux-gnu
# Install Rust using RUSTARCH derived from ARCH_ALT
# Install Rust
RUN wget --tries=3 --waitretry=5 "https://static.rust-lang.org/rustup/archive/1.28.1/${RUSTARCH}/rustup-init" && \
# TODO OPS-591: Add SHA check back based on RUSTARCH
chmod +x rustup-init && \
./rustup-init -y --no-modify-path --profile minimal --default-toolchain $RUST_VERSION --default-host ${RUSTARCH} && \
rm rustup-init && \
chmod -R a+w $RUSTUP_HOME $CARGO_HOME
##################################
########## System Dependencies ###
##################################
# Install system packages
RUN apt-get update -y \
&& DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
# NIXL build dependencies
......@@ -96,12 +137,17 @@ RUN apt-get update -y \
&& apt-get clean \
&& rm -rf /var/lib/apt/lists/*
# Download external dependencies in parallel for better performance
##################################
########## External Services #####
##################################
# Install NATS server
ENV NATS_VERSION="v2.10.28"
RUN --mount=type=cache,target=/var/cache/apt \
wget --tries=3 --waitretry=5 https://github.com/nats-io/nats-server/releases/download/${NATS_VERSION}/nats-server-${NATS_VERSION}-${ARCH}.deb && \
dpkg -i nats-server-${NATS_VERSION}-${ARCH}.deb && rm nats-server-${NATS_VERSION}-${ARCH}.deb
# Install etcd
ENV ETCD_VERSION="v3.5.21"
RUN wget --tries=3 --waitretry=5 https://github.com/etcd-io/etcd/releases/download/$ETCD_VERSION/etcd-$ETCD_VERSION-linux-${ARCH}.tar.gz -O /tmp/etcd.tar.gz && \
mkdir -p /usr/local/bin/etcd && \
......@@ -109,13 +155,21 @@ RUN wget --tries=3 --waitretry=5 https://github.com/etcd-io/etcd/releases/downlo
rm /tmp/etcd.tar.gz
ENV PATH=/usr/local/bin/etcd/:$PATH
### UCX EFA Setup ###
##################################
########## UCX Build #############
##################################
# Build and install UCX
RUN rm -rf /opt/hpcx/ucx && \
rm -rf /usr/local/ucx && \
echo "Building UCX with reference $NIXL_UCX_REF" && \
cd /usr/local/src && \
git clone https://github.com/openucx/ucx.git && \
cd ucx && git checkout $NIXL_UCX_REF && \
CC=${USE_SCCACHE:+sccache gcc} && \
CXX=${USE_SCCACHE:+sccache g++} && \
export CC=${CC} && \
export CXX=${CXX} && \
./autogen.sh && \
./configure \
--prefix=/usr/local/ucx \
......@@ -133,6 +187,7 @@ RUN rm -rf /opt/hpcx/ucx && \
--enable-mt && \
make -j$(nproc) && \
make -j$(nproc) install-strip && \
/tmp/use-sccache.sh show-stats "UCX" && \
echo "/usr/local/ucx/lib" > /etc/ld.so.conf.d/ucx.conf && \
echo "/usr/local/ucx/lib/ucx" >> /etc/ld.so.conf.d/ucx.conf && \
ldconfig && \
......@@ -144,8 +199,17 @@ ENV CPATH=/usr/include:$CPATH \
PATH=/usr/bin:$PATH \
PKG_CONFIG_PATH=/usr/lib/pkgconfig:$PKG_CONFIG_PATH
### NIXL SETUP ###
# Clone nixl source with shallow clone for faster download
##################################
########## NIXL Setup ############
##################################
# NIXL environment setup
ENV NIXL_SRC_DIR=/opt/nixl \
NIXL_PREFIX=/opt/nvidia/nvda_nixl \
NIXL_LIB_DIR=/opt/nvidia/nvda_nixl/lib/${ARCH_ALT}-linux-gnu \
NIXL_PLUGIN_DIR=/opt/nvidia/nvda_nixl/lib/${ARCH_ALT}-linux-gnu/plugins
# Build and install NIXL
RUN git clone --depth 1 --branch ${NIXL_REF} "https://github.com/ai-dynamo/nixl.git" ${NIXL_SRC_DIR} && \
cd ${NIXL_SRC_DIR} && \
if [ "$ARCH" = "arm64" ]; then \
......@@ -154,13 +218,13 @@ RUN git clone --depth 1 --branch ${NIXL_REF} "https://github.com/ai-dynamo/nixl.
nixl_build_args=""; \
fi && \
meson setup build/ --buildtype=release --prefix=$NIXL_PREFIX $nixl_build_args && \
ninja -C build/ -j$(nproc) && \
ninja -C build/ install && \
ninja -C build/ -j$(nproc) && ninja -C build/ install && \
/tmp/use-sccache.sh show-stats "NIXL" && \
echo "$NIXL_LIB_DIR" > /etc/ld.so.conf.d/nixl.conf && \
echo "$NIXL_PLUGIN_DIR" >> /etc/ld.so.conf.d/nixl.conf && \
ldconfig
# Install NIXL Python module
# Build NIXL Python module
# TODO OPS-590: Move gds_path selection based on arch into NIXL build and re-enable gds backend for arm64
RUN if [ "$ARCH" = "arm64" ]; then \
cd ${NIXL_SRC_DIR} && uv build . --out-dir /opt/dynamo/wheelhouse/nixl \
......@@ -169,11 +233,15 @@ RUN if [ "$ARCH" = "arm64" ]; then \
cd ${NIXL_SRC_DIR} && uv build . --out-dir /opt/dynamo/wheelhouse/nixl; \
fi
# Create virtual environment
##################################
########## Python Environment ####
##################################
# Create and activate virtual environment
ARG PYTHON_VERSION
RUN mkdir -p /opt/dynamo/venv && \
uv venv /opt/dynamo/venv --python 3.12
uv venv /opt/dynamo/venv --python $PYTHON_VERSION
# Activate virtual environment
ENV VIRTUAL_ENV=/opt/dynamo/venv \
PATH="/opt/dynamo/venv/bin:${PATH}"
......@@ -191,43 +259,58 @@ ARG ARCH_ALT
FROM quay.io/pypa/manylinux_2_28_${ARCH_ALT} AS wheel_builder
# Redeclare ARGs for this stage
ARG ARCH
ARG ARCH_ALT
ARG CARGO_BUILD_JOBS
# Set CARGO_BUILD_JOBS to 16 if not provided
# This is to prevent cargo from building $(nproc) jobs in parallel,
# which might exceed the number of opened files limit.
ENV CARGO_BUILD_JOBS=${CARGO_BUILD_JOBS:-16}
# Use build arg RELEASE_BUILD = true to generate wheels for Python 3.10, 3.11 and 3.12.
ARG RELEASE_BUILD
# Use arg ENABLE_KVBM = true to turn on the block-manager feature
ARG ENABLE_KVBM
ARG USE_SCCACHE
ARG SCCACHE_BUCKET
ARG SCCACHE_REGION
WORKDIR /opt/dynamo
RUN dnf update -y \
&& dnf install -y llvm-toolset protobuf-compiler python3.12-devel \
&& dnf clean all \
&& rm -rf /var/cache/dnf
ENV RUSTUP_HOME=/usr/local/rustup \
# Set environment variables
ENV CARGO_BUILD_JOBS=${CARGO_BUILD_JOBS:-16} \
RUSTUP_HOME=/usr/local/rustup \
CARGO_HOME=/usr/local/cargo \
CARGO_TARGET_DIR=/opt/dynamo/target \
VIRTUAL_ENV=/opt/dynamo/venv \
NIXL_PREFIX=/opt/nvidia/nvda_nixl
NIXL_PREFIX=/opt/nvidia/nvda_nixl \
PATH=/usr/local/cargo/bin:/opt/dynamo/venv/bin:$PATH
# Install system dependencies
ARG PYTHON_VERSION
RUN dnf update -y \
&& dnf install -y llvm-toolset protobuf-compiler python${PYTHON_VERSION}-devel wget \
&& dnf clean all \
&& rm -rf /var/cache/dnf
# Copy artifacts from base stage
COPY --from=base $RUSTUP_HOME $RUSTUP_HOME
COPY --from=base $CARGO_HOME $CARGO_HOME
COPY --from=base $NIXL_PREFIX $NIXL_PREFIX
COPY --from=base $VIRTUAL_ENV $VIRTUAL_ENV
ENV PATH=$CARGO_HOME/bin:$VIRTUAL_ENV/bin:$PATH
# Copy configuration files first for better layer caching
COPY pyproject.toml README.md LICENSE Cargo.toml Cargo.lock rust-toolchain.toml hatch_build.py /opt/dynamo/
# Install SCCACHE if requested
COPY container/use-sccache.sh /tmp/use-sccache.sh
RUN if [ "$USE_SCCACHE" = "true" ]; then \
/tmp/use-sccache.sh install; \
fi
# Set SCCACHE environment variables
ENV SCCACHE_BUCKET=${USE_SCCACHE:+${SCCACHE_BUCKET}} \
SCCACHE_REGION=${USE_SCCACHE:+${SCCACHE_REGION}} \
SCCACHE_S3_KEY_PREFIX=${USE_SCCACHE:+${ARCH}} \
RUSTC_WRAPPER=${USE_SCCACHE:+sccache}
# Copy source code
# Copy source code (order matters for layer caching)
COPY pyproject.toml README.md LICENSE Cargo.toml Cargo.lock rust-toolchain.toml hatch_build.py /opt/dynamo/
COPY lib/ /opt/dynamo/lib/
COPY components/ /opt/dynamo/components/
# Build dynamo wheel
# Build wheels
RUN uv build --wheel --out-dir /opt/dynamo/dist && \
cd /opt/dynamo/lib/bindings/python && \
uv pip install maturin[patchelf] && \
......@@ -237,14 +320,15 @@ RUN uv build --wheel --out-dir /opt/dynamo/dist && \
maturin build --release --out /opt/dynamo/dist; \
fi && \
if [ "$RELEASE_BUILD" = "true" ]; then \
# do not enable KVBM feature, ensure compatibility with lower glibc
uv run --python 3.11 maturin build --release --out /opt/dynamo/dist && \
uv run --python 3.10 maturin build --release --out /opt/dynamo/dist; \
fi
fi && \
/tmp/use-sccache.sh show-stats "Dynamo"
##############################################
########## Dev entrypoint image ##############
##############################################
FROM base AS dev
# Application environment variables
......@@ -254,16 +338,13 @@ ENV DYNAMO_HOME=/opt/dynamo \
WORKDIR /opt/dynamo
# Copy built artifacts
COPY --from=wheel_builder /opt/dynamo/dist/*.whl /opt/dynamo/wheelhouse/
COPY --from=wheel_builder $CARGO_TARGET_DIR $CARGO_TARGET_DIR
# Copy Cargo cache to avoid re-downloading dependencies
COPY --from=wheel_builder $CARGO_HOME $CARGO_HOME
# Temporarily copy benchmarks folder for installation
# Install Python packages
COPY benchmarks/ /opt/dynamo/benchmarks/
# Install all python packages
RUN uv pip install \
/opt/dynamo/wheelhouse/ai_dynamo_runtime*cp312*.whl \
/opt/dynamo/wheelhouse/ai_dynamo*any.whl \
......@@ -271,10 +352,10 @@ RUN uv pip install \
/opt/dynamo/benchmarks && \
rm -rf /opt/dynamo/benchmarks
# Copy launch banner
# Setup launch banner
RUN --mount=type=bind,source=./container/launch_message.txt,target=/opt/dynamo/launch_message.txt \
sed '/^#\s/d' /opt/dynamo/launch_message.txt > ~/.launch_screen && \
echo "cat ~/.launch_screen" >> ~/.bashrc
ENTRYPOINT ["/opt/nvidia/nvidia_entrypoint.sh"]
CMD []
CMD []
\ No newline at end of file
......@@ -2,7 +2,7 @@
# SPDX-License-Identifier: Apache-2.0
ARG BASE_IMAGE="nvcr.io/nvidia/cuda-dl-base"
# FIXME: NCCL will hang with 25.03, so use 25.01 for now
# TODO OPS-612: NCCL will hang with 25.03, so use 25.01 for now
# Please check https://github.com/ai-dynamo/dynamo/pull/1065
# for details and reproducer to manually test if the image
# can be updated to later versions.
......@@ -16,6 +16,11 @@ ARG RUNTIME_IMAGE_TAG="12.8.1-runtime-ubuntu24.04"
ARG VLLM_REF="1da94e673c257373280026f75ceb4effac80e892" # from v0.10.1.1
ARG TORCH_BACKEND="cu128"
# sccache configuration - inherit from base build
ARG USE_SCCACHE
ARG SCCACHE_BUCKET=""
ARG SCCACHE_REGION=""
# Match 0.10.1.1 vLLM release
# https://github.com/vllm-project/vllm/releases/tag/v0.10.1.1
# Pinned to commit before https://github.com/deepseek-ai/DeepGEMM/pull/112 for DeepGEMM which seems to break on H100:
......@@ -37,187 +42,172 @@ ARG FLASHINF_REF="v0.2.11"
# without adding if statements everywhere, so just define both as ARGs for now.
ARG ARCH=amd64
ARG ARCH_ALT=x86_64
##################################
########## Base Image ############
##################################
FROM ${BASE_IMAGE}:${BASE_IMAGE_TAG} AS base
# Redeclare ARCH, ARCH_ALT, TORCH_BACKEND so they're available in this stage
ARG ARCH
ARG ARCH_ALT
ARG TORCH_BACKEND
USER root
# Python configuration
ARG PYTHON_VERSION=3.12
RUN apt-get update -y && \
apt-get install -y --no-install-recommends \
# NIXL build dependencies
cmake \
meson \
ninja-build \
pybind11-dev \
# These headers are missing with the hpcx installer, required
# by UCX to find RDMA devices
libibverbs-dev rdma-core ibverbs-utils libibumad-dev \
libnuma-dev librdmacm-dev ibverbs-providers \
# Rust build dependencies
clang \
libclang-dev \
git \
build-essential \
protobuf-compiler \
libssl-dev \
pkg-config \
# Install utilities
nvtop \
tmux \
vim \
autoconf \
automake \
libtool \
net-tools \
# For Prometheus
curl tar ca-certificates && \
rm -rf /var/lib/apt/lists/*
ARG NIXL_UCX_REF=v1.19.0
ARG NIXL_REF=0.4.1
ARG DYNAMO_BASE_IMAGE="dynamo:latest-none"
FROM ${DYNAMO_BASE_IMAGE} AS dynamo_base
ENV NIXL_SRC_DIR=/opt/nixl
ENV NIXL_PREFIX=/opt/nvidia/nvda_nixl
ARG ARCH_ALT
ENV NIXL_LIB_DIR=$NIXL_PREFIX/lib/${ARCH_ALT}-linux-gnu
ENV NIXL_PLUGIN_DIR=$NIXL_LIB_DIR/plugins
ENV LD_LIBRARY_PATH=$NIXL_LIB_DIR:$NIXL_PLUGIN_DIR:$LD_LIBRARY_PATH
WORKDIR /workspace
### UCX EFA Setup ###
RUN rm -rf /opt/hpcx/ucx && \
rm -rf /usr/local/ucx && \
echo "Building UCX with reference $NIXL_UCX_REF" && \
cd /usr/local/src && \
git clone https://github.com/openucx/ucx.git && \
cd ucx && \
git checkout $NIXL_UCX_REF && \
./autogen.sh && ./configure \
--prefix=/usr/local/ucx \
--enable-shared \
--disable-static \
--disable-doxygen-doc \
--enable-optimizations \
--enable-cma \
--enable-devel-headers \
--with-cuda=/usr/local/cuda \
--with-verbs \
--with-efa \
--with-dm \
--with-gdrcopy=/usr/local \
--enable-mt && \
make -j && \
make -j install-strip && \
ldconfig
ENV LD_LIBRARY_PATH=\
/usr/lib:/usr/local/ucx/lib:\
/usr/local/ucx/lib/ucx:\
$LD_LIBRARY_PATH
ENV CPATH=/usr/include
ENV PATH=/usr/bin:$PATH
ENV PKG_CONFIG_PATH=/usr/lib/pkgconfig
SHELL ["/bin/bash", "-c"]
WORKDIR /workspace
### NIXL SETUP ###
# Clone nixl source
# TEMP: disable gds backend for arm64
RUN git clone "https://github.com/ai-dynamo/nixl.git" ${NIXL_SRC_DIR} && \
cd ${NIXL_SRC_DIR} && \
git checkout ${NIXL_REF} && \
if [ "$ARCH" = "arm64" ]; then \
nixl_build_args="-Ddisable_gds_backend=true"; \
else \
nixl_build_args=""; \
fi && \
mkdir build && \
meson setup build/ --buildtype=release --prefix=$NIXL_PREFIX $nixl_build_args && \
cd build/ && \
ninja && \
ninja install;
### NATS & ETCD SETUP ###
ENV ETCD_VERSION="v3.5.21"
RUN wget --tries=3 --waitretry=5 https://github.com/nats-io/nats-server/releases/download/v2.10.28/nats-server-v2.10.28-${ARCH}.deb && \
dpkg -i nats-server-v2.10.28-${ARCH}.deb && rm nats-server-v2.10.28-${ARCH}.deb && \
wget --tries=3 --waitretry=5 https://github.com/etcd-io/etcd/releases/download/$ETCD_VERSION/etcd-$ETCD_VERSION-linux-${ARCH}.tar.gz -O /tmp/etcd.tar.gz && \
mkdir -p /usr/local/bin/etcd && \
tar -xvf /tmp/etcd.tar.gz -C /usr/local/bin/etcd --strip-components=1 && \
rm /tmp/etcd.tar.gz
ENV PATH=/usr/local/bin/etcd/:$PATH
########################################################
########## Framework Development Image ################
########################################################
#
# PURPOSE: Framework development and vLLM compilation
#
# This stage builds and compiles framework dependencies including:
# - vLLM inference engine with CUDA support
# - DeepGEMM and FlashInfer optimizations
# - All necessary build tools and compilation dependencies
# - Framework-level Python packages and extensions
#
# Use this stage when you need to:
# - Build vLLM from source with custom modifications
# - Develop or debug framework-level components
# - Create custom builds with specific optimization flags
#
# Use dynamo base image (see /container/Dockerfile for more details)
FROM ${BASE_IMAGE}:${BASE_IMAGE_TAG} AS framework
RUN apt-get update -y \
&& DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
# vLLM build dependencies
cmake \
ibverbs-providers \
ibverbs-utils \
libibumad-dev \
libibverbs-dev \
libnuma-dev \
librdmacm-dev \
rdma-core \
&& apt-get clean \
&& rm -rf /var/lib/apt/lists/*
### VIRTUAL ENVIRONMENT SETUP ###
# Install uv and create virtualenv
ENV VIRTUAL_ENV=/opt/dynamo/venv
COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /bin/
RUN mkdir /opt/dynamo && \
uv venv ${VIRTUAL_ENV} --python 3.12
ARG PYTHON_VERSION
# Create virtual environment
RUN mkdir -p /opt/dynamo/venv && \
uv venv /opt/dynamo/venv --python $PYTHON_VERSION
# Activate virtual environment
ENV PATH="${VIRTUAL_ENV}/bin:${PATH}"
# Install NIXL Python module
# TODO: Move gds_path selection based on arch into NIXL build
# TEMP: disable gds backend for arm64
RUN if [ "$ARCH" = "arm64" ]; then \
cd ${NIXL_SRC_DIR} && uv build . --out-dir /workspace/wheels/nixl \
--config-settings=setup-args="-Ddisable_gds_backend=true"; \
else \
cd ${NIXL_SRC_DIR} && uv build . --out-dir /workspace/wheels/nixl; \
fi && \
# Install the wheel
# TODO: Move NIXL wheel install to the wheel_builder stage
uv pip install /workspace/wheels/nixl/*.whl
ENV VIRTUAL_ENV=/opt/dynamo/venv \
PATH="/opt/dynamo/venv/bin:${PATH}"
ARG ARCH
# Install vllm - keep this early in Dockerfile to avoid
# rebuilds from unrelated source code changes
ARG VLLM_REF
ARG VLLM_GIT_URL
ARG DEEPGEMM_REF
ARG FLASHINF_REF
ARG TORCH_BACKEND
ARG MAX_JOBS=16
ENV MAX_JOBS=$MAX_JOBS
ENV CUDA_HOME=/usr/local/cuda
# Install sccache if requested
COPY container/use-sccache.sh /tmp/use-sccache.sh
# Install sccache if requested
ARG USE_SCCACHE
ARG ARCH_ALT
ARG SCCACHE_BUCKET
ARG SCCACHE_REGION
ENV ARCH_ALT=${ARCH_ALT}
RUN if [ "$USE_SCCACHE" = "true" ]; then \
/tmp/use-sccache.sh install; \
fi
# Set environment variables - they'll be empty strings if USE_SCCACHE=false
ENV SCCACHE_BUCKET=${USE_SCCACHE:+${SCCACHE_BUCKET}} \
SCCACHE_REGION=${USE_SCCACHE:+${SCCACHE_REGION}} \
SCCACHE_S3_KEY_PREFIX=${USE_SCCACHE:+${ARCH}} \
CMAKE_C_COMPILER_LAUNCHER=${USE_SCCACHE:+sccache} \
CMAKE_CXX_COMPILER_LAUNCHER=${USE_SCCACHE:+sccache} \
CMAKE_CUDA_COMPILER_LAUNCHER=${USE_SCCACHE:+sccache}
# Install VLLM and related dependencies
RUN --mount=type=bind,source=./container/deps/,target=/tmp/deps \
--mount=type=cache,target=/root/.cache/uv \
# TODO - split vllm, DeepEP, DeepGeMM, PPLX installs
# Should be able to select how you want your build to go
cp /tmp/deps/vllm/install_vllm.sh /tmp/install_vllm.sh && \
chmod +x /tmp/install_vllm.sh && \
/tmp/install_vllm.sh --editable --vllm-ref $VLLM_REF --max-jobs $MAX_JOBS --arch $ARCH --installation-dir /opt --deepgemm-ref $DEEPGEMM_REF --flashinf-ref $FLASHINF_REF --torch-backend $TORCH_BACKEND;
/tmp/install_vllm.sh --editable --vllm-ref $VLLM_REF --max-jobs $MAX_JOBS --arch $ARCH --installation-dir /opt --deepgemm-ref $DEEPGEMM_REF --flashinf-ref $FLASHINF_REF --torch-backend $TORCH_BACKEND && \
/tmp/use-sccache.sh show-stats "vLLM";
ENV LD_LIBRARY_PATH=\
/opt/vllm/tools/ep_kernels/ep_kernels_workspace/nvshmem_install/lib:\
$LD_LIBRARY_PATH
# Common dependencies
RUN --mount=type=bind,source=./container/deps/requirements.txt,target=/tmp/requirements.txt \
uv pip install --requirement /tmp/requirements.txt
### MISC UTILITY SETUP ###
##################################################
########## Runtime Image ########################
##################################################
#
# PURPOSE: Production runtime environment
#
# This stage creates a lightweight production-ready image containing:
# - Pre-compiled vLLM and framework dependencies
# - Dynamo runtime libraries and Python packages
# - Essential runtime dependencies and configurations
# - Optimized for inference workloads and deployment
#
# Use this stage when you need:
# - Production deployment of Dynamo with vLLM
# - Minimal runtime footprint without build tools
# - Ready-to-run inference server environment
# - Base for custom application containers
#
FROM ${RUNTIME_IMAGE}:${RUNTIME_IMAGE_TAG} AS runtime
# Install test dependencies
RUN --mount=type=bind,source=./container/deps/requirements.test.txt,target=/tmp/requirements.txt \
uv pip install --requirement /tmp/requirements.txt && \
pyright --help > /dev/null 2>&1 && \
printf "[safe]\n directory=/workspace\n" > /root/.gitconfig
WORKDIR /workspace
ENV DYNAMO_HOME=/opt/dynamo
ENV VIRTUAL_ENV=/opt/dynamo/venv
ENV PATH="${VIRTUAL_ENV}/bin:${PATH}"
ARG ARCH_ALT
ENV NIXL_PREFIX=/opt/nvidia/nvda_nixl
ENV NIXL_LIB_DIR=$NIXL_PREFIX/lib/${ARCH_ALT}-linux-gnu
ENV NIXL_PLUGIN_DIR=$NIXL_LIB_DIR/plugins
# Install Python, build-essential and python3-dev as apt dependencies
RUN apt-get update && \
DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
# Python runtime - CRITICAL for virtual environment to work
python3-dev \
build-essential \
# jq and curl for polling various endpoints and health checks
jq \
curl \
# Libraries required by UCX to find RDMA devices
libibverbs1 rdma-core ibverbs-utils libibumad3 \
libnuma1 librdmacm1 ibverbs-providers \
# JIT Kernel Compilation, flashinfer
ninja-build \
g++ \
# prometheus dependencies
ca-certificates && \
rm -rf /var/lib/apt/lists/*
# Copy CUDA development tools (nvcc, headers, dependencies, etc.) from base devel image
COPY --from=framework /usr/local/cuda/bin/nvcc /usr/local/cuda/bin/nvcc
COPY --from=framework /usr/local/cuda/bin/cudafe++ /usr/local/cuda/bin/cudafe++
COPY --from=framework /usr/local/cuda/bin/ptxas /usr/local/cuda/bin/ptxas
COPY --from=framework /usr/local/cuda/bin/fatbinary /usr/local/cuda/bin/fatbinary
COPY --from=framework /usr/local/cuda/include/ /usr/local/cuda/include/
COPY --from=framework /usr/local/cuda/nvvm /usr/local/cuda/nvvm
COPY --from=framework /usr/local/cuda/lib64/libcudart.so* /usr/local/cuda/lib64/
### COPY NATS & ETCD ###
# Copy nats and etcd from dev image
COPY --from=dynamo_base /usr/bin/nats-server /usr/bin/nats-server
COPY --from=dynamo_base /usr/local/bin/etcd/ /usr/local/bin/etcd/
# Add ETCD and CUDA binaries to PATH so cicc and other CUDA tools are accessible
ENV PATH=/usr/local/bin/etcd/:/usr/local/cuda/nvvm/bin:$PATH
# Install prometheus
ARG PROM_VERSION=3.4.1
......@@ -227,41 +217,122 @@ RUN ARCH=$(dpkg --print-architecture) && \
arm64) PLATFORM=linux-arm64 ;; \
*) echo "Unsupported architecture: $ARCH" && exit 1 ;; \
esac && \
curl -fsSL https://github.com/prometheus/prometheus/releases/download/v${PROM_VERSION}/prometheus-${PROM_VERSION}.${PLATFORM}.tar.gz \
curl -fsSL "https://github.com/prometheus/prometheus/releases/download/v${PROM_VERSION}/prometheus-${PROM_VERSION}.${PLATFORM}.tar.gz" \
| tar -xz -C /tmp && \
mv /tmp/prometheus-${PROM_VERSION}.${PLATFORM}/prometheus /usr/local/bin/ && \
mv "/tmp/prometheus-${PROM_VERSION}.${PLATFORM}/prometheus" /usr/local/bin/ && \
chmod +x /usr/local/bin/prometheus && \
rm -rf /tmp/prometheus-${PROM_VERSION}.${PLATFORM}
rm -rf "/tmp/prometheus-${PROM_VERSION}.${PLATFORM}"
### BUILDS ###
# Copy UCX from dev image as plugin for NIXL
# Copy NIXL source from devr image
# Copy dynamo wheels for gitlab artifacts
COPY --from=dynamo_base /usr/local/ucx /usr/local/ucx
COPY --from=dynamo_base $NIXL_PREFIX $NIXL_PREFIX
ENV RUSTUP_HOME=/usr/local/rustup \
CARGO_HOME=/usr/local/cargo \
PATH=/usr/local/cargo/bin:$PATH \
RUST_VERSION=1.89.0
# Define Rust target based on ARCH_ALT ARG
ARG RUSTARCH=${ARCH_ALT}-unknown-linux-gnu
# Install Rust using RUSTARCH derived from ARCH_ALT
RUN wget --tries=3 --waitretry=5 "https://static.rust-lang.org/rustup/archive/1.28.1/${RUSTARCH}/rustup-init" && \
# TODO: Add SHA check back based on RUSTARCH
chmod +x rustup-init && \
./rustup-init -y --no-modify-path --profile default --default-toolchain $RUST_VERSION --default-host ${RUSTARCH} && \
rm rustup-init && \
chmod -R a+w $RUSTUP_HOME $CARGO_HOME
ARG CARGO_BUILD_JOBS
# Set CARGO_BUILD_JOBS to 16 if not provided
# This is to prevent cargo from building $(nproc) jobs in parallel,
# which might exceed the number of opened files limit.
ENV CARGO_BUILD_JOBS=${CARGO_BUILD_JOBS:-16}
# Copies vllm, DeepEP, DeepGEMM, PPLX repos (all editable installs) and nvshmem binaries
COPY --from=framework /opt/vllm /opt/vllm
ENV LD_LIBRARY_PATH=\
/opt/vllm/tools/ep_kernels/ep_kernels_workspace/nvshmem_install/lib:\
$NIXL_LIB_DIR:\
$NIXL_PLUGIN_DIR:\
/usr/local/ucx/lib:\
/usr/local/ucx/lib/ucx:\
$LD_LIBRARY_PATH
### VIRTUAL ENVIRONMENT SETUP ###
COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /bin/
ARG PYTHON_VERSION
RUN uv venv ${VIRTUAL_ENV} --python $PYTHON_VERSION
# Copy virtual environment from framework image to avoid re-installing framework + vllm dependencies
COPY --from=framework \
/opt/dynamo/venv/lib/python${PYTHON_VERSION}/site-packages \
/opt/dynamo/venv/lib/python${PYTHON_VERSION}/site-packages
# Install dynamo, NIXL, and dynamo-specific dependencies
COPY benchmarks/ /opt/dynamo/benchmarks/
COPY --from=dynamo_base /opt/dynamo/wheelhouse/ /opt/dynamo/wheelhouse/
RUN uv pip install \
/opt/dynamo/wheelhouse/ai_dynamo_runtime*cp312*.whl \
/opt/dynamo/wheelhouse/ai_dynamo*any.whl \
/opt/dynamo/wheelhouse/nixl/nixl*.whl \
/opt/dynamo/benchmarks && \
rm -rf /opt/dynamo/benchmarks
# Install common and test dependencies
RUN --mount=type=bind,source=./container/deps/requirements.txt,target=/tmp/requirements.txt \
--mount=type=bind,source=./container/deps/requirements.test.txt,target=/tmp/requirements.test.txt \
uv pip install --requirement /tmp/requirements.txt --requirement /tmp/requirements.test.txt
# Copy benchmarks, examples, and tests for CI
COPY . /workspace/
# Copy attribution files
COPY ATTRIBUTION* LICENSE /workspace/
# Copy launch banner
RUN --mount=type=bind,source=./container/launch_message.txt,target=/workspace/launch_message.txt \
sed '/^#\s/d' /workspace/launch_message.txt > ~/.launch_screen && \
echo "cat ~/.launch_screen" >> ~/.bashrc && \
echo "source $VIRTUAL_ENV/bin/activate" >> ~/.bashrc
ENTRYPOINT ["/opt/nvidia/nvidia_entrypoint.sh"]
CMD []
#######################################
########## Local Development ##########
########## Local Development #######
#######################################
#
# PURPOSE: Local development
#
# This stage adds development tools, utilities, and dependencies specifically
# needed for:
# - Local development and debugging
# - vscode/cursor development
#
# Use this stage when you need a full development environment with additional
# tooling beyond the base runtime image.
FROM base AS local-dev
FROM runtime AS local-dev
# Install utilities
RUN apt-get update -y && \
apt-get install -y --no-install-recommends \
# Install utilities
nvtop \
wget \
tmux \
vim \
autoconf \
automake \
libtool \
net-tools \
git \
# Build Dependencies
autoconf \
automake \
cmake \
git \
libtool \
meson \
net-tools \
ninja-build \
pybind11-dev \
# Rust build dependencies
clang \
libclang-dev \
protobuf-compiler && \
rm -rf /var/lib/apt/lists/*
# Rust environment setup
ENV RUSTUP_HOME=/usr/local/rustup \
CARGO_HOME=/usr/local/cargo \
CARGO_TARGET_DIR=/opt/dynamo/target \
PATH=/usr/local/cargo/bin:$PATH
COPY --from=dynamo_base $RUSTUP_HOME $RUSTUP_HOME
COPY --from=dynamo_base $CARGO_HOME $CARGO_HOME
# https://code.visualstudio.com/remote/advancedcontainers/add-nonroot-user
# Will use the default ubuntu user, but give sudo access
......@@ -282,9 +353,9 @@ RUN apt-get update && apt-get install -y sudo gnupg2 gnupg1 \
# This is a slow operation (~40s on my cpu)
# Much better than chown -R $USERNAME:$USERNAME /opt/dynamo/venv (~10min on my cpu)
COPY --from=base --chown=$USER_UID:$USER_GID ${VIRTUAL_ENV} ${VIRTUAL_ENV}
COPY --from=runtime --chown=$USER_UID:$USER_GID ${VIRTUAL_ENV} ${VIRTUAL_ENV}
RUN chown $USERNAME:$USERNAME ${VIRTUAL_ENV}
COPY --from=base --chown=$USERNAME:$USERNAME /usr/local/bin /usr/local/bin
COPY --from=runtime --chown=$USERNAME:$USERNAME /usr/local/bin /usr/local/bin
# so we can use maturin develop
RUN uv pip install maturin[patchelf]
......@@ -303,208 +374,5 @@ RUN SNIPPET="export PROMPT_COMMAND='history -a' && export HISTFILE=$HOME/.comman
RUN mkdir -p /home/$USERNAME/.cache/
ENTRYPOINT ["/opt/nvidia/nvidia_entrypoint.sh"]
##################################
##### Wheel Build Image ##########
##################################
# Redeclare ARCH_ALT ARG so it's available for interpolation in the FROM instruction
ARG ARCH_ALT
FROM quay.io/pypa/manylinux_2_28_${ARCH_ALT} AS wheel_builder
ARG CARGO_BUILD_JOBS
# Set CARGO_BUILD_JOBS to 16 if not provided
# This is to prevent cargo from building $(nproc) jobs in parallel,
# which might exceed the number of opened files limit.
ENV CARGO_BUILD_JOBS=${CARGO_BUILD_JOBS:-16}
# Use build arg RELEASE_BUILD = true to generate wheels for Python 3.10, 3.11 and 3.12.
ARG RELEASE_BUILD
# Use arg ENABLE_KVBM = true to turn on the block-manager feature
ARG ENABLE_KVBM
# Keep in sync with the base image.
ENV NIXL_PREFIX=/opt/nvidia/nvda_nixl
WORKDIR /workspace
RUN yum update -y \
&& yum install -y llvm-toolset \
&& yum install -y python3.12-devel \
&& yum install -y protobuf-compiler \
&& yum clean all \
&& rm -rf /var/cache/yum
ENV RUSTUP_HOME=/usr/local/rustup \
CARGO_HOME=/usr/local/cargo \
CARGO_TARGET_DIR=/workspace/target \
VIRTUAL_ENV=/opt/dynamo/venv
COPY --from=base $RUSTUP_HOME $RUSTUP_HOME
COPY --from=base $CARGO_HOME $CARGO_HOME
COPY --from=base $NIXL_PREFIX $NIXL_PREFIX
COPY --from=base /workspace /workspace
COPY --from=base $VIRTUAL_ENV $VIRTUAL_ENV
ENV PATH=$CARGO_HOME/bin:$VIRTUAL_ENV/bin:$PATH
# Copy configuration files
COPY pyproject.toml /workspace/
COPY README.md /workspace/
COPY LICENSE /workspace/
COPY Cargo.toml /workspace/
COPY Cargo.lock /workspace/
COPY rust-toolchain.toml /workspace/
COPY hatch_build.py /workspace/
# Copy source code
COPY lib/ /workspace/lib/
COPY components /workspace/components
COPY launch /workspace/launch
RUN cargo build \
--release \
--locked \
--features dynamo-llm/block-manager \
--workspace
# Build dynamo wheel
RUN uv build --wheel --out-dir /workspace/dist && \
cd /workspace/lib/bindings/python && \
uv pip install maturin[patchelf] && \
if [ "$ENABLE_KVBM" = "true" ]; then \
maturin build --release --features block-manager --out /workspace/dist; \
else \
maturin build --release --out /workspace/dist; \
fi && \
if [ "$RELEASE_BUILD" = "true" ]; then \
# do not enable KVBM feature, ensure compatibility with lower glibc
uv run --python 3.11 maturin build --release --out /workspace/dist && \
uv run --python 3.10 maturin build --release --out /workspace/dist; \
fi
#######################################
########## CI Minimum Image ###########
#######################################
FROM base AS ci_minimum
ENV DYNAMO_HOME=/workspace
ENV CARGO_TARGET_DIR=/workspace/target
WORKDIR /workspace
COPY --from=wheel_builder /workspace /workspace
COPY --from=wheel_builder $NIXL_PREFIX $NIXL_PREFIX
# Copy Cargo cache to avoid re-downloading dependencies
COPY --from=wheel_builder $CARGO_HOME $CARGO_HOME
# Copy rest of the code
COPY . /workspace
# Package the bindings
RUN mkdir -p /opt/dynamo/bindings/wheels && \
mkdir /opt/dynamo/bindings/lib && \
cp dist/ai_dynamo*cp312*.whl /opt/dynamo/bindings/wheels/. && \
cp target/release/metrics /usr/local/bin
RUN uv pip install /workspace/dist/ai_dynamo_runtime*cp312*.whl && \
uv pip install /workspace/dist/ai_dynamo*any.whl
RUN uv pip install /workspace/benchmarks
# Copy launch banner
RUN --mount=type=bind,source=./container/launch_message.txt,target=/workspace/launch_message.txt \
sed '/^#\s/d' /workspace/launch_message.txt > ~/.launch_screen && \
echo "cat ~/.launch_screen" >> ~/.bashrc
########################################
########## Development Image ###########
########################################
FROM ci_minimum AS dev
ENTRYPOINT ["/opt/nvidia/nvidia_entrypoint.sh"]
CMD []
####################################
########## Runtime Image ###########
####################################
FROM ${RUNTIME_IMAGE}:${RUNTIME_IMAGE_TAG} AS runtime
WORKDIR /workspace
ENV DYNAMO_HOME=/workspace
ENV VIRTUAL_ENV=/opt/dynamo/venv
ENV PATH="${VIRTUAL_ENV}/bin:${PATH}"
ARG ARCH_ALT
ENV NIXL_PREFIX=/opt/nvidia/nvda_nixl
ENV NIXL_LIB_DIR=$NIXL_PREFIX/lib/${ARCH_ALT}-linux-gnu
ENV NIXL_PLUGIN_DIR=$NIXL_LIB_DIR/plugins
# Install build-essential and python3-dev as apt dependencies
RUN apt-get update && \
DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
build-essential \
python3-dev \
# jq and curl for polling various endpoints and health checks
jq \
curl \
# For debugging
vim \
# Libraries required by UCX to find RDMA devices
libibverbs1 rdma-core ibverbs-utils libibumad3 \
libnuma1 librdmacm1 ibverbs-providers \
# JIT Kernel Compilation, flashinfer
ninja-build \
g++ \
cuda-toolkit-12-8 && \
rm -rf /var/lib/apt/lists/*
### COPY NATS & ETCD & PROMETHEUS ###
# Copy nats and etcd from base image
COPY --from=base /usr/bin/nats-server /usr/bin/nats-server
COPY --from=base /usr/local/bin/etcd/ /usr/local/bin/etcd/
ENV PATH=/usr/local/bin/etcd/:$PATH
# Copy prometheus from base image
COPY --from=base /usr/local/bin/prometheus /usr/local/bin/prometheus
# Copy UCX from base image as plugin for NIXL
# Copy NIXL source from wheel_builder image
# Copy dynamo wheels for gitlab artifacts
COPY --from=base /usr/local/ucx /usr/local/ucx
COPY --from=wheel_builder $NIXL_PREFIX $NIXL_PREFIX
COPY --from=wheel_builder /workspace/dist/*.whl wheelhouse/
# Copies vllm, DeepEP, DeepGEMM, PPLX repos (all editable installs) and nvshmem binaries
COPY --from=base /opt/vllm /opt/vllm
ENV LD_LIBRARY_PATH=\
/opt/vllm/tools/ep_kernels/ep_kernels_workspace/nvshmem_install/lib:\
$NIXL_LIB_DIR:\
$NIXL_PLUGIN_DIR:\
/usr/local/ucx/lib:\
/usr/local/ucx/lib/ucx:\
$LD_LIBRARY_PATH
# Copy entire venv
# Theres a lot of stuff we'd have to re-compile (for arm64)
# TODO: use pip ai-dynamo[vllm] in venv to replicate end user environment
# Copy metrics binary from wheel_builder image, not part of ai-dynamo wheel
COPY --from=ci_minimum /workspace/target/release/metrics /usr/local/bin/metrics
COPY --from=ci_minimum ${VIRTUAL_ENV} ${VIRTUAL_ENV}
# Keep everything from ci_minimum for mypy and other pre-merge tests
# TODO: Remove this once we have a functional CI image built on top of the runtime image
COPY --from=ci_minimum /workspace/ /workspace/
# Copy launch banner
RUN --mount=type=bind,source=./container/launch_message.txt,target=/workspace/launch_message.txt \
sed '/^#\s/d' /workspace/launch_message.txt > ~/.launch_screen && \
echo "cat ~/.launch_screen" >> ~/.bashrc && \
echo "source $VIRTUAL_ENV/bin/activate" >> ~/.bashrc
ENTRYPOINT ["/opt/nvidia/nvidia_entrypoint.sh"]
CMD []
......@@ -121,6 +121,11 @@ NIXL_UCX_EFA_REF=9d2b88a1f67faf9876f267658bd077b379b8bb76
NO_CACHE=""
# sccache configuration for S3
USE_SCCACHE=""
SCCACHE_BUCKET=""
SCCACHE_REGION=""
get_options() {
while :; do
case $1 in
......@@ -282,9 +287,25 @@ get_options() {
--make-efa)
NIXL_UCX_REF=$NIXL_UCX_EFA_REF
;;
--)
shift
break
--use-sccache)
USE_SCCACHE=true
;;
--sccache-bucket)
if [ "$2" ]; then
SCCACHE_BUCKET=$2
shift
else
missing_requirement "$1"
fi
;;
--sccache-region)
if [ "$2" ]; then
SCCACHE_REGION=$2
shift
else
missing_requirement "$1"
fi
;;
-?*)
error 'ERROR: Unknown option: ' "$1"
......@@ -345,6 +366,16 @@ get_options() {
else
TARGET_STR="--target dev"
fi
# Validate sccache configuration
if [ "$USE_SCCACHE" = true ]; then
if [ -z "$SCCACHE_BUCKET" ]; then
error "ERROR: --sccache-bucket is required when --use-sccache is specified"
fi
if [ -z "$SCCACHE_REGION" ]; then
error "ERROR: --sccache-region is required when --use-sccache is specified"
fi
fi
}
......@@ -360,6 +391,15 @@ show_image_options() {
echo " Build Context: '${BUILD_CONTEXT}'"
echo " Build Arguments: '${BUILD_ARGS}'"
echo " Framework: '${FRAMEWORK}'"
if [ "$USE_SCCACHE" = true ]; then
echo " sccache: Enabled"
echo " sccache Bucket: '${SCCACHE_BUCKET}'"
echo " sccache Region: '${SCCACHE_REGION}'"
if [ -n "$SCCACHE_S3_KEY_PREFIX" ]; then
echo " sccache S3 Key Prefix: '${SCCACHE_S3_KEY_PREFIX}'"
fi
fi
echo ""
}
......@@ -386,6 +426,9 @@ show_help() {
echo " [--make-efa Enables EFA support for NIXL]"
echo " [--enable-kvbm Enables KVBM support in Python 3.12]"
echo " [--trtllm-use-nixl-kvcache-experimental Enables NIXL KVCACHE experimental support for TensorRT-LLM]"
echo " [--use-sccache enable sccache for Rust/C/C++ compilation caching]"
echo " [--sccache-bucket S3 bucket name for sccache (required with --use-sccache)]"
echo " [--sccache-region S3 region for sccache (required with --use-sccache)]"
exit 0
}
......@@ -400,6 +443,7 @@ error() {
get_options "$@"
# Automatically set ARCH and ARCH_ALT if PLATFORM is linux/arm64
ARCH="amd64"
if [[ "$PLATFORM" == *"linux/arm64"* ]]; then
......@@ -547,6 +591,15 @@ if [ -n "${NIXL_UCX_REF}" ]; then
BUILD_ARGS+=" --build-arg NIXL_UCX_REF=${NIXL_UCX_REF} "
fi
# Add sccache build arguments
if [ "$USE_SCCACHE" = true ]; then
BUILD_ARGS+=" --build-arg USE_SCCACHE=true"
BUILD_ARGS+=" --build-arg SCCACHE_BUCKET=${SCCACHE_BUCKET}"
BUILD_ARGS+=" --build-arg SCCACHE_REGION=${SCCACHE_REGION}"
fi
LATEST_TAG="--tag dynamo:latest-${FRAMEWORK,,}"
if [ -n "${TARGET}" ]; then
LATEST_TAG="${LATEST_TAG}-${TARGET}"
......@@ -558,6 +611,24 @@ if [ -z "$RUN_PREFIX" ]; then
set -x
fi
$RUN_PREFIX docker build -f $DOCKERFILE $TARGET_STR $PLATFORM $BUILD_ARGS $CACHE_FROM $CACHE_TO $TAG $LATEST_TAG $BUILD_CONTEXT_ARG $BUILD_CONTEXT $NO_CACHE
# TODO: Follow 2-step build process for all frameworks once necessary changes are made to the sglang and TRT-LLM backend Dockerfiles.
if [[ $FRAMEWORK == "VLLM" ]]; then
# Define base image tag before using it
DYNAMO_BASE_IMAGE="dynamo-base:${VERSION}"
# Start base image build
echo "======================================"
echo "Starting Build 1: Base Image"
echo "======================================"
$RUN_PREFIX docker build -f "${SOURCE_DIR}/Dockerfile" --target dev $PLATFORM $BUILD_ARGS $CACHE_FROM $CACHE_TO --tag $DYNAMO_BASE_IMAGE $BUILD_CONTEXT_ARG $BUILD_CONTEXT $NO_CACHE
# Start framework build
echo "======================================"
echo "Starting Build 2: Framework Image"
echo "======================================"
BUILD_ARGS+=" --build-arg DYNAMO_BASE_IMAGE=${DYNAMO_BASE_IMAGE}"
$RUN_PREFIX docker build -f $DOCKERFILE $TARGET_STR $PLATFORM $BUILD_ARGS $CACHE_FROM $CACHE_TO $TAG $LATEST_TAG $BUILD_CONTEXT_ARG $BUILD_CONTEXT $NO_CACHE
else
$RUN_PREFIX docker build -f $DOCKERFILE $TARGET_STR $PLATFORM $BUILD_ARGS $CACHE_FROM $CACHE_TO $TAG $LATEST_TAG $BUILD_CONTEXT_ARG $BUILD_CONTEXT $NO_CACHE
fi
{ set +x; } 2>/dev/null
#!/bin/bash
# SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
set -euo pipefail
# sccache management script
# This script handles sccache installation, environment setup, and statistics display
SCCACHE_VERSION="v0.8.2"
usage() {
cat << EOF
Usage: $0 [COMMAND] [OPTIONS]
Commands:
install Install sccache binary (requires ARCH_ALT environment variable)
show-stats Display sccache statistics with optional build name
help Show this help message
Environment variables:
USE_SCCACHE Set to 'true' to enable sccache
SCCACHE_BUCKET S3 bucket name (fallback if not passed as parameter)
SCCACHE_REGION S3 region (fallback if not passed as parameter)
ARCH Architecture for S3 key prefix (fallback if not passed as parameter)
ARCH_ALT Alternative architecture name for downloads (e.g., x86_64, aarch64)
Examples:
# Install sccache (requires ARCH_ALT to be set)
ARCH_ALT=x86_64 $0 install
# Show stats with build name
$0 show-stats "UCX"
EOF
}
install_sccache() {
if [ -z "${ARCH_ALT:-}" ]; then
echo "Error: ARCH_ALT environment variable is required for sccache installation"
exit 1
fi
echo "Installing sccache ${SCCACHE_VERSION} for architecture ${ARCH_ALT}..."
# Download and install sccache
wget --tries=3 --waitretry=5 \
"https://github.com/mozilla/sccache/releases/download/${SCCACHE_VERSION}/sccache-${SCCACHE_VERSION}-${ARCH_ALT}-unknown-linux-musl.tar.gz"
tar -xzf "sccache-${SCCACHE_VERSION}-${ARCH_ALT}-unknown-linux-musl.tar.gz"
mv "sccache-${SCCACHE_VERSION}-${ARCH_ALT}-unknown-linux-musl/sccache" /usr/local/bin/
# Cleanup
rm -rf sccache*
echo "sccache installed successfully"
}
show_stats() {
if command -v sccache >/dev/null 2>&1; then
echo "=== sccache statistics AFTER $1 ==="
sccache --show-stats
else
echo "sccache is not available"
fi
}
main() {
case "${1:-help}" in
install)
install_sccache
;;
generate-env)
shift # Remove the command from arguments
generate_env_file "$@" # Pass all remaining arguments
;;
show-stats)
shift # Remove the command from arguments
show_stats "$@" # Pass all remaining arguments
;;
help|--help|-h)
usage
;;
*)
echo "Unknown command: $1"
usage
exit 1
;;
esac
}
main "$@"
......@@ -263,7 +263,6 @@ def vllm_config_test(request):
@pytest.mark.e2e
@pytest.mark.slow
def test_serve_deployment(vllm_config_test, request, runtime_services):
"""
Test dynamo serve deployments with different graph configurations.
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment