Unverified Commit 82bae247 authored by Anant Sharma's avatar Anant Sharma Committed by GitHub
Browse files

ci: add support for vllm sanity testing on Github (#2526)


Signed-off-by: default avatarAnant Sharma <anants@nvidia.com>
Co-authored-by: default avatarTushar Sharma <tusharma@nvidia.com>
parent e3619ce0
# SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
name: NVIDIA Github Validation
on:
push:
branches:
- main
- "pull-request/[0-9]+"
jobs:
build-test:
runs-on: gpu-l40-runners
strategy:
matrix:
framework: [vllm]
include:
- framework: vllm
target: runtime
pytest_marks: "e2e and vllm and gpu_1 and not slow"
# Do not cancel main branch runs
concurrency:
group: ${{ matrix.framework }}-build-test-${{ github.ref_name || github.run_id }}
cancel-in-progress: ${{ github.ref_name != 'main' }}
name: Build and Test - ${{ matrix.framework }}
env:
CONTAINER_ID: test_${{ github.run_id }}_${{ github.run_attempt }}_${{ github.job }}_${{ matrix.framework }}
PYTEST_XML_FILE: pytest_test_report.xml
FRAMEWORK: ${{ matrix.framework }}
TARGET: ${{ matrix.target }}
PYTEST_MARKS: ${{ matrix.pytest_marks }}
steps:
- name: Checkout repository
uses: actions/checkout@v4
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
- name: Login to NGC
if: github.event.pull_request.head.repo.full_name == github.repository || github.event_name == 'push'
run: |
echo "${{ secrets.NGC_CI_ACCESS_TOKEN }}" | docker login nvcr.io -u '$oauthtoken' --password-stdin
- name: Cleanup
if: always()
run: |
docker system prune -af
- name: Debug
run: |
lsmod | grep nvidia
sudo dmesg | grep -i nvrm || true
nvidia-smi
- name: Build image
env:
GITHUB_TOKEN: ${{ secrets.CI_TOKEN }}
AWS_DEFAULT_REGION: ${{ secrets.AWS_DEFAULT_REGION }}
SCCACHE_S3_BUCKET: ${{ secrets.SCCACHE_S3_BUCKET }}
run: |
./container/build.sh --tag ${{ matrix.framework }}:latest \
--target ${{ matrix.target }} \
--framework ${{ matrix.framework }} \
--use-sccache \
--sccache-bucket "$SCCACHE_S3_BUCKET" \
--sccache-region "$AWS_DEFAULT_REGION"
- name: Run pytest
run: |
docker run --rm --gpus all -w /workspace \
--name ${{ env.CONTAINER_ID }}_pytest \
${{ matrix.framework }}:latest \
bash -c "pytest -xsv --basetemp=/tmp --junitxml=${{ env.PYTEST_XML_FILE }} -m \"${{ env.PYTEST_MARKS }}\""
# SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0 # SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
name: NVIDIA Test Github Validation name: NVIDIA Github Validation
on: on:
push: push:
...@@ -21,6 +9,11 @@ on: ...@@ -21,6 +9,11 @@ on:
- main - main
pull_request: pull_request:
# Do not cancel main branch runs
concurrency:
group: dynamo-build-test-${{ github.ref_name || github.run_id }}
cancel-in-progress: ${{ github.ref_name != 'main' }}
jobs: jobs:
build-test: build-test:
runs-on: runs-on:
...@@ -53,7 +46,14 @@ jobs: ...@@ -53,7 +46,14 @@ jobs:
docker compose up -d nats-server etcd-server docker compose up -d nats-server etcd-server
- name: Run Rust checks (block-manager + integration tests) - name: Run Rust checks (block-manager + integration tests)
run: | run: |
docker run -v ${{ github.workspace }}:/workspace -w /workspace/lib/llm --name ${{ env.CONTAINER_ID }}_rust_checks ${{ steps.define_image_tag.outputs.image_tag }} bash -ec 'rustup component add rustfmt clippy && cargo fmt -- --check && cargo clippy --features block-manager --no-deps --all-targets -- -D warnings && cargo test --locked --all-targets --features=block-manager && cargo test --locked --features integration -- --nocapture' docker run --rm -v ${{ github.workspace }}:/workspace -w /workspace/lib/llm \
--name ${{ env.CONTAINER_ID }}_rust_checks \
${{ steps.define_image_tag.outputs.image_tag }} \
bash -ec 'rustup component add rustfmt clippy && \
cargo fmt -- --check && \
cargo clippy --features block-manager --no-deps --all-targets -- -D warnings && \
cargo test --locked --all-targets --features=block-manager && \
cargo test --locked --features integration -- --nocapture'
- name: Cleanup services - name: Cleanup services
if: always() if: always()
working-directory: ./deploy working-directory: ./deploy
...@@ -63,7 +63,10 @@ jobs: ...@@ -63,7 +63,10 @@ jobs:
env: env:
PYTEST_MARKS: "pre_merge or mypy" PYTEST_MARKS: "pre_merge or mypy"
run: | run: |
docker run -v ${{ github.workspace }}:/workspace -w /workspace --name ${{ env.CONTAINER_ID }}_pytest ${{ steps.define_image_tag.outputs.image_tag }} bash -c "pytest --basetemp=/tmp --junitxml=${{ env.PYTEST_XML_FILE }} -m \"${{ env.PYTEST_MARKS }}\"" docker run -v ${{ github.workspace }}:/workspace -w /workspace \
--name ${{ env.CONTAINER_ID }}_pytest \
${{ steps.define_image_tag.outputs.image_tag }} \
bash -c "pytest --basetemp=/tmp --junitxml=${{ env.PYTEST_XML_FILE }} -m \"${{ env.PYTEST_MARKS }}\""
- name: Copy test report from test Container - name: Copy test report from test Container
if: always() if: always()
run: | run: |
......
# SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0 # SPDX-License-Identifier: Apache-2.0
##################################
########## Build Arguments ########
##################################
# Base image configuration
ARG BASE_IMAGE="nvcr.io/nvidia/cuda-dl-base" ARG BASE_IMAGE="nvcr.io/nvidia/cuda-dl-base"
# TODO OPS-612: NCCL will hang with 25.03, so use 25.01 for now # TODO OPS-612: NCCL will hang with 25.03, so use 25.01 for now
# Please check https://github.com/ai-dynamo/dynamo/pull/1065 # Please check https://github.com/ai-dynamo/dynamo/pull/1065
# for details and reproducer to manually test if the image # for details and reproducer to manually test if the image
# can be updated to later versions. # can be updated to later versions.
ARG BASE_IMAGE_TAG="25.01-cuda12.8-devel-ubuntu24.04" ARG BASE_IMAGE_TAG="25.01-cuda12.8-devel-ubuntu24.04"
# Build configuration
ARG RELEASE_BUILD=false ARG RELEASE_BUILD=false
ARG ENABLE_KVBM=false ARG ENABLE_KVBM=false
ARG CARGO_BUILD_JOBS
# Define general architecture ARGs for supporting both x86 and aarch64 builds. # Define general architecture ARGs for supporting both x86 and aarch64 builds.
# ARCH: Used for package suffixes (e.g., amd64, arm64) # ARCH: Used for package suffixes (e.g., amd64, arm64)
...@@ -23,6 +31,17 @@ ARG ENABLE_KVBM=false ...@@ -23,6 +31,17 @@ ARG ENABLE_KVBM=false
ARG ARCH=amd64 ARG ARCH=amd64
ARG ARCH_ALT=x86_64 ARG ARCH_ALT=x86_64
# SCCACHE configuration
ARG USE_SCCACHE
ARG SCCACHE_BUCKET=""
ARG SCCACHE_REGION=""
# NIXL configuration
ARG NIXL_UCX_REF=v1.19.0
ARG NIXL_REF=0.4.1
# Python configuration
ARG PYTHON_VERSION=3.12
################################## ##################################
########## Base Image ############ ########## Base Image ############
...@@ -30,44 +49,66 @@ ARG ARCH_ALT=x86_64 ...@@ -30,44 +49,66 @@ ARG ARCH_ALT=x86_64
FROM ${BASE_IMAGE}:${BASE_IMAGE_TAG} AS base FROM ${BASE_IMAGE}:${BASE_IMAGE_TAG} AS base
# Redeclare ARCH and ARCH_ALT so they're available in this stage # Redeclare ARGs for this stage
ARG ARCH ARG ARCH
ARG ARCH_ALT ARG ARCH_ALT
ARG CARGO_BUILD_JOBS ARG PYTHON_VERSION
ARG USE_SCCACHE
ARG NIXL_UCX_REF=v1.19.0 ARG SCCACHE_BUCKET
ARG NIXL_REF=0.4.1 ARG SCCACHE_REGION
ARG NIXL_UCX_REF
# Environment variables for NIXL ARG NIXL_REF
ENV NIXL_SRC_DIR=/opt/nixl \
NIXL_PREFIX=/opt/nvidia/nvda_nixl \
NIXL_LIB_DIR=/opt/nvidia/nvda_nixl/lib/${ARCH_ALT}-linux-gnu \
NIXL_PLUGIN_DIR=/opt/nvidia/nvda_nixl/lib/${ARCH_ALT}-linux-gnu/plugins
USER root USER root
ARG PYTHON_VERSION=3.12 WORKDIR /opt/dynamo
##################################
########## Tool Installation #####
##################################
# Install uv package manager
COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /bin/ COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /bin/
# Install SCCACHE if requested
COPY container/use-sccache.sh /tmp/use-sccache.sh
RUN if [ "$USE_SCCACHE" = "true" ]; then \
/tmp/use-sccache.sh install; \
fi
# Set SCCACHE environment variables
ENV SCCACHE_BUCKET=${USE_SCCACHE:+${SCCACHE_BUCKET}} \
SCCACHE_REGION=${USE_SCCACHE:+${SCCACHE_REGION}} \
SCCACHE_S3_KEY_PREFIX=${USE_SCCACHE:+${ARCH}} \
RUSTC_WRAPPER=${USE_SCCACHE:+sccache} \
CMAKE_C_COMPILER_LAUNCHER=${USE_SCCACHE:+sccache} \
CMAKE_CXX_COMPILER_LAUNCHER=${USE_SCCACHE:+sccache} \
CMAKE_CUDA_COMPILER_LAUNCHER=${USE_SCCACHE:+sccache}
##################################
########## Rust Setup ############
##################################
# Rust environment setup # Rust environment setup
ENV RUSTUP_HOME=/usr/local/rustup \ ENV RUSTUP_HOME=/usr/local/rustup \
CARGO_HOME=/usr/local/cargo \ CARGO_HOME=/usr/local/cargo \
PATH=/usr/local/cargo/bin:$PATH \ PATH=/usr/local/cargo/bin:$PATH \
RUST_VERSION=1.89.0 RUST_VERSION=1.89.0
WORKDIR /opt/dynamo
# Define Rust target based on ARCH_ALT ARG # Define Rust target based on ARCH_ALT ARG
ARG RUSTARCH=${ARCH_ALT}-unknown-linux-gnu ARG RUSTARCH=${ARCH_ALT}-unknown-linux-gnu
# Install Rust using RUSTARCH derived from ARCH_ALT # Install Rust
RUN wget --tries=3 --waitretry=5 "https://static.rust-lang.org/rustup/archive/1.28.1/${RUSTARCH}/rustup-init" && \ RUN wget --tries=3 --waitretry=5 "https://static.rust-lang.org/rustup/archive/1.28.1/${RUSTARCH}/rustup-init" && \
# TODO OPS-591: Add SHA check back based on RUSTARCH
chmod +x rustup-init && \ chmod +x rustup-init && \
./rustup-init -y --no-modify-path --profile minimal --default-toolchain $RUST_VERSION --default-host ${RUSTARCH} && \ ./rustup-init -y --no-modify-path --profile minimal --default-toolchain $RUST_VERSION --default-host ${RUSTARCH} && \
rm rustup-init && \ rm rustup-init && \
chmod -R a+w $RUSTUP_HOME $CARGO_HOME chmod -R a+w $RUSTUP_HOME $CARGO_HOME
##################################
########## System Dependencies ###
##################################
# Install system packages
RUN apt-get update -y \ RUN apt-get update -y \
&& DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \ && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
# NIXL build dependencies # NIXL build dependencies
...@@ -96,12 +137,17 @@ RUN apt-get update -y \ ...@@ -96,12 +137,17 @@ RUN apt-get update -y \
&& apt-get clean \ && apt-get clean \
&& rm -rf /var/lib/apt/lists/* && rm -rf /var/lib/apt/lists/*
# Download external dependencies in parallel for better performance ##################################
########## External Services #####
##################################
# Install NATS server
ENV NATS_VERSION="v2.10.28" ENV NATS_VERSION="v2.10.28"
RUN --mount=type=cache,target=/var/cache/apt \ RUN --mount=type=cache,target=/var/cache/apt \
wget --tries=3 --waitretry=5 https://github.com/nats-io/nats-server/releases/download/${NATS_VERSION}/nats-server-${NATS_VERSION}-${ARCH}.deb && \ wget --tries=3 --waitretry=5 https://github.com/nats-io/nats-server/releases/download/${NATS_VERSION}/nats-server-${NATS_VERSION}-${ARCH}.deb && \
dpkg -i nats-server-${NATS_VERSION}-${ARCH}.deb && rm nats-server-${NATS_VERSION}-${ARCH}.deb dpkg -i nats-server-${NATS_VERSION}-${ARCH}.deb && rm nats-server-${NATS_VERSION}-${ARCH}.deb
# Install etcd
ENV ETCD_VERSION="v3.5.21" ENV ETCD_VERSION="v3.5.21"
RUN wget --tries=3 --waitretry=5 https://github.com/etcd-io/etcd/releases/download/$ETCD_VERSION/etcd-$ETCD_VERSION-linux-${ARCH}.tar.gz -O /tmp/etcd.tar.gz && \ RUN wget --tries=3 --waitretry=5 https://github.com/etcd-io/etcd/releases/download/$ETCD_VERSION/etcd-$ETCD_VERSION-linux-${ARCH}.tar.gz -O /tmp/etcd.tar.gz && \
mkdir -p /usr/local/bin/etcd && \ mkdir -p /usr/local/bin/etcd && \
...@@ -109,13 +155,21 @@ RUN wget --tries=3 --waitretry=5 https://github.com/etcd-io/etcd/releases/downlo ...@@ -109,13 +155,21 @@ RUN wget --tries=3 --waitretry=5 https://github.com/etcd-io/etcd/releases/downlo
rm /tmp/etcd.tar.gz rm /tmp/etcd.tar.gz
ENV PATH=/usr/local/bin/etcd/:$PATH ENV PATH=/usr/local/bin/etcd/:$PATH
### UCX EFA Setup ### ##################################
########## UCX Build #############
##################################
# Build and install UCX
RUN rm -rf /opt/hpcx/ucx && \ RUN rm -rf /opt/hpcx/ucx && \
rm -rf /usr/local/ucx && \ rm -rf /usr/local/ucx && \
echo "Building UCX with reference $NIXL_UCX_REF" && \ echo "Building UCX with reference $NIXL_UCX_REF" && \
cd /usr/local/src && \ cd /usr/local/src && \
git clone https://github.com/openucx/ucx.git && \ git clone https://github.com/openucx/ucx.git && \
cd ucx && git checkout $NIXL_UCX_REF && \ cd ucx && git checkout $NIXL_UCX_REF && \
CC=${USE_SCCACHE:+sccache gcc} && \
CXX=${USE_SCCACHE:+sccache g++} && \
export CC=${CC} && \
export CXX=${CXX} && \
./autogen.sh && \ ./autogen.sh && \
./configure \ ./configure \
--prefix=/usr/local/ucx \ --prefix=/usr/local/ucx \
...@@ -133,6 +187,7 @@ RUN rm -rf /opt/hpcx/ucx && \ ...@@ -133,6 +187,7 @@ RUN rm -rf /opt/hpcx/ucx && \
--enable-mt && \ --enable-mt && \
make -j$(nproc) && \ make -j$(nproc) && \
make -j$(nproc) install-strip && \ make -j$(nproc) install-strip && \
/tmp/use-sccache.sh show-stats "UCX" && \
echo "/usr/local/ucx/lib" > /etc/ld.so.conf.d/ucx.conf && \ echo "/usr/local/ucx/lib" > /etc/ld.so.conf.d/ucx.conf && \
echo "/usr/local/ucx/lib/ucx" >> /etc/ld.so.conf.d/ucx.conf && \ echo "/usr/local/ucx/lib/ucx" >> /etc/ld.so.conf.d/ucx.conf && \
ldconfig && \ ldconfig && \
...@@ -144,8 +199,17 @@ ENV CPATH=/usr/include:$CPATH \ ...@@ -144,8 +199,17 @@ ENV CPATH=/usr/include:$CPATH \
PATH=/usr/bin:$PATH \ PATH=/usr/bin:$PATH \
PKG_CONFIG_PATH=/usr/lib/pkgconfig:$PKG_CONFIG_PATH PKG_CONFIG_PATH=/usr/lib/pkgconfig:$PKG_CONFIG_PATH
### NIXL SETUP ### ##################################
# Clone nixl source with shallow clone for faster download ########## NIXL Setup ############
##################################
# NIXL environment setup
ENV NIXL_SRC_DIR=/opt/nixl \
NIXL_PREFIX=/opt/nvidia/nvda_nixl \
NIXL_LIB_DIR=/opt/nvidia/nvda_nixl/lib/${ARCH_ALT}-linux-gnu \
NIXL_PLUGIN_DIR=/opt/nvidia/nvda_nixl/lib/${ARCH_ALT}-linux-gnu/plugins
# Build and install NIXL
RUN git clone --depth 1 --branch ${NIXL_REF} "https://github.com/ai-dynamo/nixl.git" ${NIXL_SRC_DIR} && \ RUN git clone --depth 1 --branch ${NIXL_REF} "https://github.com/ai-dynamo/nixl.git" ${NIXL_SRC_DIR} && \
cd ${NIXL_SRC_DIR} && \ cd ${NIXL_SRC_DIR} && \
if [ "$ARCH" = "arm64" ]; then \ if [ "$ARCH" = "arm64" ]; then \
...@@ -154,13 +218,13 @@ RUN git clone --depth 1 --branch ${NIXL_REF} "https://github.com/ai-dynamo/nixl. ...@@ -154,13 +218,13 @@ RUN git clone --depth 1 --branch ${NIXL_REF} "https://github.com/ai-dynamo/nixl.
nixl_build_args=""; \ nixl_build_args=""; \
fi && \ fi && \
meson setup build/ --buildtype=release --prefix=$NIXL_PREFIX $nixl_build_args && \ meson setup build/ --buildtype=release --prefix=$NIXL_PREFIX $nixl_build_args && \
ninja -C build/ -j$(nproc) && \ ninja -C build/ -j$(nproc) && ninja -C build/ install && \
ninja -C build/ install && \ /tmp/use-sccache.sh show-stats "NIXL" && \
echo "$NIXL_LIB_DIR" > /etc/ld.so.conf.d/nixl.conf && \ echo "$NIXL_LIB_DIR" > /etc/ld.so.conf.d/nixl.conf && \
echo "$NIXL_PLUGIN_DIR" >> /etc/ld.so.conf.d/nixl.conf && \ echo "$NIXL_PLUGIN_DIR" >> /etc/ld.so.conf.d/nixl.conf && \
ldconfig ldconfig
# Install NIXL Python module # Build NIXL Python module
# TODO OPS-590: Move gds_path selection based on arch into NIXL build and re-enable gds backend for arm64 # TODO OPS-590: Move gds_path selection based on arch into NIXL build and re-enable gds backend for arm64
RUN if [ "$ARCH" = "arm64" ]; then \ RUN if [ "$ARCH" = "arm64" ]; then \
cd ${NIXL_SRC_DIR} && uv build . --out-dir /opt/dynamo/wheelhouse/nixl \ cd ${NIXL_SRC_DIR} && uv build . --out-dir /opt/dynamo/wheelhouse/nixl \
...@@ -169,11 +233,15 @@ RUN if [ "$ARCH" = "arm64" ]; then \ ...@@ -169,11 +233,15 @@ RUN if [ "$ARCH" = "arm64" ]; then \
cd ${NIXL_SRC_DIR} && uv build . --out-dir /opt/dynamo/wheelhouse/nixl; \ cd ${NIXL_SRC_DIR} && uv build . --out-dir /opt/dynamo/wheelhouse/nixl; \
fi fi
# Create virtual environment ##################################
########## Python Environment ####
##################################
# Create and activate virtual environment
ARG PYTHON_VERSION
RUN mkdir -p /opt/dynamo/venv && \ RUN mkdir -p /opt/dynamo/venv && \
uv venv /opt/dynamo/venv --python 3.12 uv venv /opt/dynamo/venv --python $PYTHON_VERSION
# Activate virtual environment
ENV VIRTUAL_ENV=/opt/dynamo/venv \ ENV VIRTUAL_ENV=/opt/dynamo/venv \
PATH="/opt/dynamo/venv/bin:${PATH}" PATH="/opt/dynamo/venv/bin:${PATH}"
...@@ -191,43 +259,58 @@ ARG ARCH_ALT ...@@ -191,43 +259,58 @@ ARG ARCH_ALT
FROM quay.io/pypa/manylinux_2_28_${ARCH_ALT} AS wheel_builder FROM quay.io/pypa/manylinux_2_28_${ARCH_ALT} AS wheel_builder
# Redeclare ARGs for this stage
ARG ARCH
ARG ARCH_ALT
ARG CARGO_BUILD_JOBS ARG CARGO_BUILD_JOBS
# Set CARGO_BUILD_JOBS to 16 if not provided
# This is to prevent cargo from building $(nproc) jobs in parallel,
# which might exceed the number of opened files limit.
ENV CARGO_BUILD_JOBS=${CARGO_BUILD_JOBS:-16}
# Use build arg RELEASE_BUILD = true to generate wheels for Python 3.10, 3.11 and 3.12.
ARG RELEASE_BUILD ARG RELEASE_BUILD
# Use arg ENABLE_KVBM = true to turn on the block-manager feature
ARG ENABLE_KVBM ARG ENABLE_KVBM
ARG USE_SCCACHE
ARG SCCACHE_BUCKET
ARG SCCACHE_REGION
WORKDIR /opt/dynamo WORKDIR /opt/dynamo
RUN dnf update -y \ # Set environment variables
&& dnf install -y llvm-toolset protobuf-compiler python3.12-devel \ ENV CARGO_BUILD_JOBS=${CARGO_BUILD_JOBS:-16} \
&& dnf clean all \ RUSTUP_HOME=/usr/local/rustup \
&& rm -rf /var/cache/dnf
ENV RUSTUP_HOME=/usr/local/rustup \
CARGO_HOME=/usr/local/cargo \ CARGO_HOME=/usr/local/cargo \
CARGO_TARGET_DIR=/opt/dynamo/target \ CARGO_TARGET_DIR=/opt/dynamo/target \
VIRTUAL_ENV=/opt/dynamo/venv \ VIRTUAL_ENV=/opt/dynamo/venv \
NIXL_PREFIX=/opt/nvidia/nvda_nixl NIXL_PREFIX=/opt/nvidia/nvda_nixl \
PATH=/usr/local/cargo/bin:/opt/dynamo/venv/bin:$PATH
# Install system dependencies
ARG PYTHON_VERSION
RUN dnf update -y \
&& dnf install -y llvm-toolset protobuf-compiler python${PYTHON_VERSION}-devel wget \
&& dnf clean all \
&& rm -rf /var/cache/dnf
# Copy artifacts from base stage
COPY --from=base $RUSTUP_HOME $RUSTUP_HOME COPY --from=base $RUSTUP_HOME $RUSTUP_HOME
COPY --from=base $CARGO_HOME $CARGO_HOME COPY --from=base $CARGO_HOME $CARGO_HOME
COPY --from=base $NIXL_PREFIX $NIXL_PREFIX COPY --from=base $NIXL_PREFIX $NIXL_PREFIX
COPY --from=base $VIRTUAL_ENV $VIRTUAL_ENV COPY --from=base $VIRTUAL_ENV $VIRTUAL_ENV
ENV PATH=$CARGO_HOME/bin:$VIRTUAL_ENV/bin:$PATH
# Copy configuration files first for better layer caching # Install SCCACHE if requested
COPY pyproject.toml README.md LICENSE Cargo.toml Cargo.lock rust-toolchain.toml hatch_build.py /opt/dynamo/ COPY container/use-sccache.sh /tmp/use-sccache.sh
RUN if [ "$USE_SCCACHE" = "true" ]; then \
/tmp/use-sccache.sh install; \
fi
# Set SCCACHE environment variables
ENV SCCACHE_BUCKET=${USE_SCCACHE:+${SCCACHE_BUCKET}} \
SCCACHE_REGION=${USE_SCCACHE:+${SCCACHE_REGION}} \
SCCACHE_S3_KEY_PREFIX=${USE_SCCACHE:+${ARCH}} \
RUSTC_WRAPPER=${USE_SCCACHE:+sccache}
# Copy source code # Copy source code (order matters for layer caching)
COPY pyproject.toml README.md LICENSE Cargo.toml Cargo.lock rust-toolchain.toml hatch_build.py /opt/dynamo/
COPY lib/ /opt/dynamo/lib/ COPY lib/ /opt/dynamo/lib/
COPY components/ /opt/dynamo/components/ COPY components/ /opt/dynamo/components/
# Build dynamo wheel # Build wheels
RUN uv build --wheel --out-dir /opt/dynamo/dist && \ RUN uv build --wheel --out-dir /opt/dynamo/dist && \
cd /opt/dynamo/lib/bindings/python && \ cd /opt/dynamo/lib/bindings/python && \
uv pip install maturin[patchelf] && \ uv pip install maturin[patchelf] && \
...@@ -237,14 +320,15 @@ RUN uv build --wheel --out-dir /opt/dynamo/dist && \ ...@@ -237,14 +320,15 @@ RUN uv build --wheel --out-dir /opt/dynamo/dist && \
maturin build --release --out /opt/dynamo/dist; \ maturin build --release --out /opt/dynamo/dist; \
fi && \ fi && \
if [ "$RELEASE_BUILD" = "true" ]; then \ if [ "$RELEASE_BUILD" = "true" ]; then \
# do not enable KVBM feature, ensure compatibility with lower glibc
uv run --python 3.11 maturin build --release --out /opt/dynamo/dist && \ uv run --python 3.11 maturin build --release --out /opt/dynamo/dist && \
uv run --python 3.10 maturin build --release --out /opt/dynamo/dist; \ uv run --python 3.10 maturin build --release --out /opt/dynamo/dist; \
fi fi && \
/tmp/use-sccache.sh show-stats "Dynamo"
############################################## ##############################################
########## Dev entrypoint image ############## ########## Dev entrypoint image ##############
############################################## ##############################################
FROM base AS dev FROM base AS dev
# Application environment variables # Application environment variables
...@@ -254,16 +338,13 @@ ENV DYNAMO_HOME=/opt/dynamo \ ...@@ -254,16 +338,13 @@ ENV DYNAMO_HOME=/opt/dynamo \
WORKDIR /opt/dynamo WORKDIR /opt/dynamo
# Copy built artifacts
COPY --from=wheel_builder /opt/dynamo/dist/*.whl /opt/dynamo/wheelhouse/ COPY --from=wheel_builder /opt/dynamo/dist/*.whl /opt/dynamo/wheelhouse/
COPY --from=wheel_builder $CARGO_TARGET_DIR $CARGO_TARGET_DIR COPY --from=wheel_builder $CARGO_TARGET_DIR $CARGO_TARGET_DIR
# Copy Cargo cache to avoid re-downloading dependencies
COPY --from=wheel_builder $CARGO_HOME $CARGO_HOME COPY --from=wheel_builder $CARGO_HOME $CARGO_HOME
# Temporarily copy benchmarks folder for installation # Install Python packages
COPY benchmarks/ /opt/dynamo/benchmarks/ COPY benchmarks/ /opt/dynamo/benchmarks/
# Install all python packages
RUN uv pip install \ RUN uv pip install \
/opt/dynamo/wheelhouse/ai_dynamo_runtime*cp312*.whl \ /opt/dynamo/wheelhouse/ai_dynamo_runtime*cp312*.whl \
/opt/dynamo/wheelhouse/ai_dynamo*any.whl \ /opt/dynamo/wheelhouse/ai_dynamo*any.whl \
...@@ -271,10 +352,10 @@ RUN uv pip install \ ...@@ -271,10 +352,10 @@ RUN uv pip install \
/opt/dynamo/benchmarks && \ /opt/dynamo/benchmarks && \
rm -rf /opt/dynamo/benchmarks rm -rf /opt/dynamo/benchmarks
# Copy launch banner # Setup launch banner
RUN --mount=type=bind,source=./container/launch_message.txt,target=/opt/dynamo/launch_message.txt \ RUN --mount=type=bind,source=./container/launch_message.txt,target=/opt/dynamo/launch_message.txt \
sed '/^#\s/d' /opt/dynamo/launch_message.txt > ~/.launch_screen && \ sed '/^#\s/d' /opt/dynamo/launch_message.txt > ~/.launch_screen && \
echo "cat ~/.launch_screen" >> ~/.bashrc echo "cat ~/.launch_screen" >> ~/.bashrc
ENTRYPOINT ["/opt/nvidia/nvidia_entrypoint.sh"] ENTRYPOINT ["/opt/nvidia/nvidia_entrypoint.sh"]
CMD [] CMD []
\ No newline at end of file
This diff is collapsed.
...@@ -121,6 +121,11 @@ NIXL_UCX_EFA_REF=9d2b88a1f67faf9876f267658bd077b379b8bb76 ...@@ -121,6 +121,11 @@ NIXL_UCX_EFA_REF=9d2b88a1f67faf9876f267658bd077b379b8bb76
NO_CACHE="" NO_CACHE=""
# sccache configuration for S3
USE_SCCACHE=""
SCCACHE_BUCKET=""
SCCACHE_REGION=""
get_options() { get_options() {
while :; do while :; do
case $1 in case $1 in
...@@ -282,9 +287,25 @@ get_options() { ...@@ -282,9 +287,25 @@ get_options() {
--make-efa) --make-efa)
NIXL_UCX_REF=$NIXL_UCX_EFA_REF NIXL_UCX_REF=$NIXL_UCX_EFA_REF
;; ;;
--) --use-sccache)
shift USE_SCCACHE=true
break ;;
--sccache-bucket)
if [ "$2" ]; then
SCCACHE_BUCKET=$2
shift
else
missing_requirement "$1"
fi
;;
--sccache-region)
if [ "$2" ]; then
SCCACHE_REGION=$2
shift
else
missing_requirement "$1"
fi
;; ;;
-?*) -?*)
error 'ERROR: Unknown option: ' "$1" error 'ERROR: Unknown option: ' "$1"
...@@ -345,6 +366,16 @@ get_options() { ...@@ -345,6 +366,16 @@ get_options() {
else else
TARGET_STR="--target dev" TARGET_STR="--target dev"
fi fi
# Validate sccache configuration
if [ "$USE_SCCACHE" = true ]; then
if [ -z "$SCCACHE_BUCKET" ]; then
error "ERROR: --sccache-bucket is required when --use-sccache is specified"
fi
if [ -z "$SCCACHE_REGION" ]; then
error "ERROR: --sccache-region is required when --use-sccache is specified"
fi
fi
} }
...@@ -360,6 +391,15 @@ show_image_options() { ...@@ -360,6 +391,15 @@ show_image_options() {
echo " Build Context: '${BUILD_CONTEXT}'" echo " Build Context: '${BUILD_CONTEXT}'"
echo " Build Arguments: '${BUILD_ARGS}'" echo " Build Arguments: '${BUILD_ARGS}'"
echo " Framework: '${FRAMEWORK}'" echo " Framework: '${FRAMEWORK}'"
if [ "$USE_SCCACHE" = true ]; then
echo " sccache: Enabled"
echo " sccache Bucket: '${SCCACHE_BUCKET}'"
echo " sccache Region: '${SCCACHE_REGION}'"
if [ -n "$SCCACHE_S3_KEY_PREFIX" ]; then
echo " sccache S3 Key Prefix: '${SCCACHE_S3_KEY_PREFIX}'"
fi
fi
echo "" echo ""
} }
...@@ -386,6 +426,9 @@ show_help() { ...@@ -386,6 +426,9 @@ show_help() {
echo " [--make-efa Enables EFA support for NIXL]" echo " [--make-efa Enables EFA support for NIXL]"
echo " [--enable-kvbm Enables KVBM support in Python 3.12]" echo " [--enable-kvbm Enables KVBM support in Python 3.12]"
echo " [--trtllm-use-nixl-kvcache-experimental Enables NIXL KVCACHE experimental support for TensorRT-LLM]" echo " [--trtllm-use-nixl-kvcache-experimental Enables NIXL KVCACHE experimental support for TensorRT-LLM]"
echo " [--use-sccache enable sccache for Rust/C/C++ compilation caching]"
echo " [--sccache-bucket S3 bucket name for sccache (required with --use-sccache)]"
echo " [--sccache-region S3 region for sccache (required with --use-sccache)]"
exit 0 exit 0
} }
...@@ -400,6 +443,7 @@ error() { ...@@ -400,6 +443,7 @@ error() {
get_options "$@" get_options "$@"
# Automatically set ARCH and ARCH_ALT if PLATFORM is linux/arm64 # Automatically set ARCH and ARCH_ALT if PLATFORM is linux/arm64
ARCH="amd64" ARCH="amd64"
if [[ "$PLATFORM" == *"linux/arm64"* ]]; then if [[ "$PLATFORM" == *"linux/arm64"* ]]; then
...@@ -547,6 +591,15 @@ if [ -n "${NIXL_UCX_REF}" ]; then ...@@ -547,6 +591,15 @@ if [ -n "${NIXL_UCX_REF}" ]; then
BUILD_ARGS+=" --build-arg NIXL_UCX_REF=${NIXL_UCX_REF} " BUILD_ARGS+=" --build-arg NIXL_UCX_REF=${NIXL_UCX_REF} "
fi fi
# Add sccache build arguments
if [ "$USE_SCCACHE" = true ]; then
BUILD_ARGS+=" --build-arg USE_SCCACHE=true"
BUILD_ARGS+=" --build-arg SCCACHE_BUCKET=${SCCACHE_BUCKET}"
BUILD_ARGS+=" --build-arg SCCACHE_REGION=${SCCACHE_REGION}"
fi
LATEST_TAG="--tag dynamo:latest-${FRAMEWORK,,}" LATEST_TAG="--tag dynamo:latest-${FRAMEWORK,,}"
if [ -n "${TARGET}" ]; then if [ -n "${TARGET}" ]; then
LATEST_TAG="${LATEST_TAG}-${TARGET}" LATEST_TAG="${LATEST_TAG}-${TARGET}"
...@@ -558,6 +611,24 @@ if [ -z "$RUN_PREFIX" ]; then ...@@ -558,6 +611,24 @@ if [ -z "$RUN_PREFIX" ]; then
set -x set -x
fi fi
$RUN_PREFIX docker build -f $DOCKERFILE $TARGET_STR $PLATFORM $BUILD_ARGS $CACHE_FROM $CACHE_TO $TAG $LATEST_TAG $BUILD_CONTEXT_ARG $BUILD_CONTEXT $NO_CACHE # TODO: Follow 2-step build process for all frameworks once necessary changes are made to the sglang and TRT-LLM backend Dockerfiles.
if [[ $FRAMEWORK == "VLLM" ]]; then
# Define base image tag before using it
DYNAMO_BASE_IMAGE="dynamo-base:${VERSION}"
# Start base image build
echo "======================================"
echo "Starting Build 1: Base Image"
echo "======================================"
$RUN_PREFIX docker build -f "${SOURCE_DIR}/Dockerfile" --target dev $PLATFORM $BUILD_ARGS $CACHE_FROM $CACHE_TO --tag $DYNAMO_BASE_IMAGE $BUILD_CONTEXT_ARG $BUILD_CONTEXT $NO_CACHE
# Start framework build
echo "======================================"
echo "Starting Build 2: Framework Image"
echo "======================================"
BUILD_ARGS+=" --build-arg DYNAMO_BASE_IMAGE=${DYNAMO_BASE_IMAGE}"
$RUN_PREFIX docker build -f $DOCKERFILE $TARGET_STR $PLATFORM $BUILD_ARGS $CACHE_FROM $CACHE_TO $TAG $LATEST_TAG $BUILD_CONTEXT_ARG $BUILD_CONTEXT $NO_CACHE
else
$RUN_PREFIX docker build -f $DOCKERFILE $TARGET_STR $PLATFORM $BUILD_ARGS $CACHE_FROM $CACHE_TO $TAG $LATEST_TAG $BUILD_CONTEXT_ARG $BUILD_CONTEXT $NO_CACHE
fi
{ set +x; } 2>/dev/null { set +x; } 2>/dev/null
#!/bin/bash
# SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
set -euo pipefail
# sccache management script
# This script handles sccache installation, environment setup, and statistics display
SCCACHE_VERSION="v0.8.2"
usage() {
cat << EOF
Usage: $0 [COMMAND] [OPTIONS]
Commands:
install Install sccache binary (requires ARCH_ALT environment variable)
show-stats Display sccache statistics with optional build name
help Show this help message
Environment variables:
USE_SCCACHE Set to 'true' to enable sccache
SCCACHE_BUCKET S3 bucket name (fallback if not passed as parameter)
SCCACHE_REGION S3 region (fallback if not passed as parameter)
ARCH Architecture for S3 key prefix (fallback if not passed as parameter)
ARCH_ALT Alternative architecture name for downloads (e.g., x86_64, aarch64)
Examples:
# Install sccache (requires ARCH_ALT to be set)
ARCH_ALT=x86_64 $0 install
# Show stats with build name
$0 show-stats "UCX"
EOF
}
install_sccache() {
if [ -z "${ARCH_ALT:-}" ]; then
echo "Error: ARCH_ALT environment variable is required for sccache installation"
exit 1
fi
echo "Installing sccache ${SCCACHE_VERSION} for architecture ${ARCH_ALT}..."
# Download and install sccache
wget --tries=3 --waitretry=5 \
"https://github.com/mozilla/sccache/releases/download/${SCCACHE_VERSION}/sccache-${SCCACHE_VERSION}-${ARCH_ALT}-unknown-linux-musl.tar.gz"
tar -xzf "sccache-${SCCACHE_VERSION}-${ARCH_ALT}-unknown-linux-musl.tar.gz"
mv "sccache-${SCCACHE_VERSION}-${ARCH_ALT}-unknown-linux-musl/sccache" /usr/local/bin/
# Cleanup
rm -rf sccache*
echo "sccache installed successfully"
}
show_stats() {
if command -v sccache >/dev/null 2>&1; then
echo "=== sccache statistics AFTER $1 ==="
sccache --show-stats
else
echo "sccache is not available"
fi
}
main() {
case "${1:-help}" in
install)
install_sccache
;;
generate-env)
shift # Remove the command from arguments
generate_env_file "$@" # Pass all remaining arguments
;;
show-stats)
shift # Remove the command from arguments
show_stats "$@" # Pass all remaining arguments
;;
help|--help|-h)
usage
;;
*)
echo "Unknown command: $1"
usage
exit 1
;;
esac
}
main "$@"
...@@ -263,7 +263,6 @@ def vllm_config_test(request): ...@@ -263,7 +263,6 @@ def vllm_config_test(request):
@pytest.mark.e2e @pytest.mark.e2e
@pytest.mark.slow
def test_serve_deployment(vllm_config_test, request, runtime_services): def test_serve_deployment(vllm_config_test, request, runtime_services):
""" """
Test dynamo serve deployments with different graph configurations. Test dynamo serve deployments with different graph configurations.
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment