Unverified Commit f0cea269 authored by Tushar Sharma's avatar Tushar Sharma Committed by GitHub
Browse files

build: OPS-597: restructure sglang to follow container strategy structure (#2803)


Signed-off-by: default avatarTushar Sharma <tusharma@nvidia.com>
parent 96064614
...@@ -2,7 +2,7 @@ ...@@ -2,7 +2,7 @@
# SPDX-License-Identifier: Apache-2.0 # SPDX-License-Identifier: Apache-2.0
ARG BASE_IMAGE="nvcr.io/nvidia/cuda-dl-base" ARG BASE_IMAGE="nvcr.io/nvidia/cuda-dl-base"
# FIXME: NCCL will hang with 25.03, so use 25.01 for now # TODO OPS-612: NCCL will hang with 25.03, so use 25.01 for now
# Please check https://github.com/ai-dynamo/dynamo/pull/1065 # Please check https://github.com/ai-dynamo/dynamo/pull/1065
# for details and reproducer to manually test if the image # for details and reproducer to manually test if the image
# can be updated to later versions. # can be updated to later versions.
...@@ -11,6 +11,10 @@ ARG RELEASE_BUILD ...@@ -11,6 +11,10 @@ ARG RELEASE_BUILD
ARG RUNTIME_IMAGE="nvcr.io/nvidia/cuda" ARG RUNTIME_IMAGE="nvcr.io/nvidia/cuda"
ARG RUNTIME_IMAGE_TAG="12.8.1-runtime-ubuntu24.04" ARG RUNTIME_IMAGE_TAG="12.8.1-runtime-ubuntu24.04"
# Make sure to update the dependency version in pyproject.toml when updating this
ARG SGLANG_VERSION="0.5.0rc2"
# Define general architecture ARGs for supporting both x86 and aarch64 builds. # Define general architecture ARGs for supporting both x86 and aarch64 builds.
# ARCH: Used for package suffixes (e.g., amd64, arm64) # ARCH: Used for package suffixes (e.g., amd64, arm64)
# ARCH_ALT: Used for Rust targets, manylinux suffix (e.g., x86_64, aarch64) # ARCH_ALT: Used for Rust targets, manylinux suffix (e.g., x86_64, aarch64)
...@@ -25,150 +29,70 @@ ARG RUNTIME_IMAGE_TAG="12.8.1-runtime-ubuntu24.04" ...@@ -25,150 +29,70 @@ ARG RUNTIME_IMAGE_TAG="12.8.1-runtime-ubuntu24.04"
# without adding if statements everywhere, so just define both as ARGs for now. # without adding if statements everywhere, so just define both as ARGs for now.
ARG ARCH=amd64 ARG ARCH=amd64
ARG ARCH_ALT=x86_64 ARG ARCH_ALT=x86_64
# Python configuration
# Make sure to update the dependency version in pyproject.toml when updating this
ARG SGLANG_VERSION="0.5.0rc2"
##################################
########## Base Image ############
##################################
FROM ${BASE_IMAGE}:${BASE_IMAGE_TAG} AS base
# Redeclare ARCH and ARCH_ALT so they're available in this stage
ARG ARCH
ARG ARCH_ALT
ARG NIXL_UCX_REF=v1.19.0
ARG NIXL_REF=0.4.1
ENV NIXL_SRC_DIR=/opt/nixl
ENV NIXL_PREFIX=/opt/nvidia/nvda_nixl
ENV NIXL_LIB_DIR=$NIXL_PREFIX/lib/${ARCH_ALT}-linux-gnu
ENV NIXL_PLUGIN_DIR=$NIXL_LIB_DIR/plugins
ENV LD_LIBRARY_PATH=$NIXL_LIB_DIR:$NIXL_PLUGIN_DIR:$LD_LIBRARY_PATH
USER root
ARG PYTHON_VERSION=3.12 ARG PYTHON_VERSION=3.12
RUN apt-get update -y && \ ARG DYNAMO_BASE_IMAGE="dynamo:latest-none"
apt-get install -y \ FROM ${DYNAMO_BASE_IMAGE} AS dynamo_base
# NIXL build dependencies
cmake \
meson \
ninja-build \
pybind11-dev \
# These headers are missing with the hpcx installer, required
# by UCX to find RDMA devices
libibverbs-dev rdma-core ibverbs-utils libibumad-dev \
libnuma-dev librdmacm-dev ibverbs-providers \
# Rust build dependencies
clang \
libclang-dev \
git \
# Install utilities
nvtop \
tmux \
vim \
autoconf \
libtool \
net-tools
WORKDIR /workspace
### UCX EFA Setup ###
RUN rm -rf /opt/hpcx/ucx && \
rm -rf /usr/local/ucx && \
echo "Building UCX with reference $NIXL_UCX_REF" && \
cd /usr/local/src && \
git clone https://github.com/openucx/ucx.git && \
cd ucx && \
git checkout $NIXL_UCX_REF && \
./autogen.sh && ./configure \
--prefix=/usr/local/ucx \
--enable-shared \
--disable-static \
--disable-doxygen-doc \
--enable-optimizations \
--enable-cma \
--enable-devel-headers \
--with-cuda=/usr/local/cuda \
--with-verbs \
--with-efa \
--with-dm \
--with-gdrcopy=/usr/local \
--enable-mt && \
make -j && \
make -j install-strip && \
ldconfig
ENV LD_LIBRARY_PATH=\
/usr/lib:/usr/local/ucx/lib:\
/usr/local/ucx/lib/ucx:\
$LD_LIBRARY_PATH
ENV CPATH=/usr/include:$CPATH
ENV PATH=/usr/bin:$PATH
ENV PKG_CONFIG_PATH=/usr/lib/pkgconfig:$PKG_CONFIG_PATH
SHELL ["/bin/bash", "-c"]
WORKDIR /workspace
### NIXL SETUP ### ########################################################
# Clone nixl source ########## Framework Development Image ################
# TEMP: disable gds backend for arm64 ########################################################
RUN git clone "https://github.com/ai-dynamo/nixl.git" ${NIXL_SRC_DIR} && \ #
cd ${NIXL_SRC_DIR} && \ # PURPOSE: Framework development and SGLang compilation
git checkout ${NIXL_REF} && \ #
if [ "$ARCH" = "arm64" ]; then \ # This stage builds and compiles framework dependencies including:
nixl_build_args="-Ddisable_gds_backend=true"; \ # - SGLang inference engine with CUDA support
else \ # - All necessary build tools and compilation dependencies
nixl_build_args=""; \ # - Framework-level Python packages and extensions
fi && \ #
mkdir build && \ # Use this stage when you need to:
meson setup build/ --buildtype=release --prefix=$NIXL_PREFIX $nixl_build_args && \ # - Build SGLang from source with custom modifications
cd build/ && \ # - Develop or debug framework-level components
ninja && \ # - Create custom builds with specific optimization flags
ninja install; #
### NATS & ETCD SETUP ###
# nats
RUN wget --tries=3 --waitretry=5 https://github.com/nats-io/nats-server/releases/download/v2.10.28/nats-server-v2.10.28-${ARCH}.deb && \
dpkg -i nats-server-v2.10.28-${ARCH}.deb && rm nats-server-v2.10.28-${ARCH}.deb
# etcd
ENV ETCD_VERSION="v3.5.21"
RUN wget --tries=3 --waitretry=5 https://github.com/etcd-io/etcd/releases/download/$ETCD_VERSION/etcd-$ETCD_VERSION-linux-${ARCH}.tar.gz -O /tmp/etcd.tar.gz && \
mkdir -p /usr/local/bin/etcd && \
tar -xvf /tmp/etcd.tar.gz -C /usr/local/bin/etcd --strip-components=1 && \
rm /tmp/etcd.tar.gz
ENV PATH=/usr/local/bin/etcd/:$PATH
# Use dynamo base image (see /container/Dockerfile for more details)
FROM ${BASE_IMAGE}:${BASE_IMAGE_TAG} AS framework
ARG PYTHON_VERSION
RUN apt-get update -y \
&& DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
# Python runtime - CRITICAL for virtual environment to work
python${PYTHON_VERSION}-dev \
build-essential \
# SGLang build dependencies
cmake \
ibverbs-providers \
ibverbs-utils \
libibumad-dev \
libibverbs-dev \
libnuma-dev \
librdmacm-dev \
rdma-core \
&& apt-get clean \
&& rm -rf /var/lib/apt/lists/*
### VIRTUAL ENVIRONMENT SETUP ### ### VIRTUAL ENVIRONMENT SETUP ###
# Install uv and create virtualenv
COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /bin/ COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /bin/
RUN mkdir /opt/dynamo && \ ARG PYTHON_VERSION
uv venv /opt/dynamo/venv --python 3.12 # Create virtual environment
RUN mkdir -p /opt/dynamo/venv && \
uv venv /opt/dynamo/venv --python $PYTHON_VERSION
# Activate virtual environment # Activate virtual environment
ENV VIRTUAL_ENV=/opt/dynamo/venv ENV VIRTUAL_ENV=/opt/dynamo/venv \
ENV PATH="${VIRTUAL_ENV}/bin:${PATH}" PATH="/opt/dynamo/venv/bin:${PATH}"
# Install NIXL Python module ARG ARCH
# TODO: Move gds_path selection based on arch into NIXL build # Redeclare ARCH and ARCH_ALT so they're available in this stage
# TEMP: disable gds backend for arm64 ARG ARCH_ALT
RUN if [ "$ARCH" = "arm64" ]; then \
cd ${NIXL_SRC_DIR} && uv build . --out-dir /workspace/wheels/nixl \
--config-settings=setup-args="-Ddisable_gds_backend=true"; \ WORKDIR /workspace
else \ # Install SGLang and related dependencies
cd ${NIXL_SRC_DIR} && uv build . --out-dir /workspace/wheels/nixl; \
fi && \
# Install the wheel
# TODO: Move NIXL wheel install to the wheel_builder stage
uv pip install /workspace/wheels/nixl/*.whl
# Install sglang
#TODO: Built wheel should become an artifact which can be cached and reused in subsequent builds
ARG SGLANG_VERSION ARG SGLANG_VERSION
RUN --mount=type=cache,target=/root/.cache/uv \ RUN --mount=type=cache,target=/root/.cache/uv \
cd /opt && \ cd /opt && \
...@@ -181,55 +105,56 @@ RUN --mount=type=cache,target=/root/.cache/uv \ ...@@ -181,55 +105,56 @@ RUN --mount=type=cache,target=/root/.cache/uv \
# Set env var that allows for forceful shutdown of inflight requests in SGL's TokenizerManager # Set env var that allows for forceful shutdown of inflight requests in SGL's TokenizerManager
ENV SGL_FORCE_SHUTDOWN=1 ENV SGL_FORCE_SHUTDOWN=1
# Common dependencies ##################################################
RUN --mount=type=bind,source=./container/deps/requirements.txt,target=/tmp/requirements.txt \ ########## Runtime Image ########################
uv pip install --requirement /tmp/requirements.txt ##################################################
#
# Install test dependencies # PURPOSE: Production runtime environment
RUN --mount=type=bind,source=./container/deps/requirements.test.txt,target=/tmp/requirements.txt \ #
uv pip install --requirement /tmp/requirements.txt # This stage creates a lightweight production-ready image containing:
# - Pre-compiled SGLang and framework dependencies
# ### MISC UTILITY SETUP ### # - Dynamo runtime libraries and Python packages
# - Essential runtime dependencies and configurations
# Finish pyright install # - Optimized for inference workloads and deployment
RUN pyright --help > /dev/null 2>&1 #
# Use this stage when you need:
# Enable Git operations in the /workspace directory # - Production deployment of Dynamo with SGLang
RUN printf "[safe]\n directory=/workspace\n" > /root/.gitconfig # - Minimal runtime footprint without build tools
# - Ready-to-run inference server environment
# - Base for custom application containers
#
FROM ${RUNTIME_IMAGE}:${RUNTIME_IMAGE_TAG} AS runtime
### BUILDS ### WORKDIR /workspace
ENV DYNAMO_HOME=/opt/dynamo
ENV VIRTUAL_ENV=/opt/dynamo/venv
ENV PATH="${VIRTUAL_ENV}/bin:${PATH}"
# Rust build/dev dependencies ARG ARCH_ALT
RUN apt update -y && \ ARG PYTHON_VERSION
apt install --no-install-recommends -y \ ENV NIXL_PREFIX=/opt/nvidia/nvda_nixl
build-essential \ ENV NIXL_LIB_DIR=$NIXL_PREFIX/lib/${ARCH_ALT}-linux-gnu
protobuf-compiler \ ENV NIXL_PLUGIN_DIR=$NIXL_LIB_DIR/plugins
cmake \
libssl-dev \
pkg-config
ENV RUSTUP_HOME=/usr/local/rustup \ # Install Python, build-essential and python3-dev as apt dependencies
CARGO_HOME=/usr/local/cargo \ RUN apt-get update && \
PATH=/usr/local/cargo/bin:$PATH \ DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
RUST_VERSION=1.89.0 # Python runtime - CRITICAL for virtual environment to work
python${PYTHON_VERSION}-dev \
# Define Rust target based on ARCH_ALT ARG build-essential \
ARG RUSTARCH=${ARCH_ALT}-unknown-linux-gnu # jq and curl for polling various endpoints and health checks
jq \
# Install Rust using RUSTARCH derived from ARCH_ALT curl \
RUN wget --tries=3 --waitretry=5 "https://static.rust-lang.org/rustup/archive/1.28.1/${RUSTARCH}/rustup-init" && \ # Libraries required by UCX to find RDMA devices
# TODO: Add SHA check back based on RUSTARCH libibverbs1 rdma-core ibverbs-utils libibumad3 \
chmod +x rustup-init && \ libnuma1 librdmacm1 ibverbs-providers \
./rustup-init -y --no-modify-path --profile minimal --default-toolchain $RUST_VERSION --default-host ${RUSTARCH} && \ # JIT Kernel Compilation, flashinfer
rm rustup-init && \ ninja-build \
chmod -R a+w $RUSTUP_HOME $CARGO_HOME g++ \
# prometheus dependencies
ARG CARGO_BUILD_JOBS ca-certificates && \
# Set CARGO_BUILD_JOBS to 16 if not provided rm -rf /var/lib/apt/lists/*
# This is to prevent cargo from building $(nproc) jobs in parallel,
# which might exceed the number of opened files limit.
ENV CARGO_BUILD_JOBS=${CARGO_BUILD_JOBS:-16}
# Install prometheus # Install prometheus
ARG PROM_VERSION=3.4.1 ARG PROM_VERSION=3.4.1
...@@ -239,210 +164,37 @@ RUN ARCH=$(dpkg --print-architecture) && \ ...@@ -239,210 +164,37 @@ RUN ARCH=$(dpkg --print-architecture) && \
arm64) PLATFORM=linux-arm64 ;; \ arm64) PLATFORM=linux-arm64 ;; \
*) echo "Unsupported architecture: $ARCH" && exit 1 ;; \ *) echo "Unsupported architecture: $ARCH" && exit 1 ;; \
esac && \ esac && \
curl -fsSL https://github.com/prometheus/prometheus/releases/download/v${PROM_VERSION}/prometheus-${PROM_VERSION}.${PLATFORM}.tar.gz \ curl -fsSL "https://github.com/prometheus/prometheus/releases/download/v${PROM_VERSION}/prometheus-${PROM_VERSION}.${PLATFORM}.tar.gz" \
| tar -xz -C /tmp && \ | tar -xz -C /tmp && \
mv /tmp/prometheus-${PROM_VERSION}.${PLATFORM}/prometheus /usr/local/bin/ && \ mv "/tmp/prometheus-${PROM_VERSION}.${PLATFORM}/prometheus" /usr/local/bin/ && \
chmod +x /usr/local/bin/prometheus && \ chmod +x /usr/local/bin/prometheus && \
rm -rf /tmp/prometheus-${PROM_VERSION}.${PLATFORM} rm -rf "/tmp/prometheus-${PROM_VERSION}.${PLATFORM}"
#######################################
########## Local Development ##########
#######################################
FROM base AS local-dev
# https://code.visualstudio.com/remote/advancedcontainers/add-nonroot-user
# Will use the default ubuntu user, but give sudo access
# Needed so files permissions aren't set to root ownership when writing from inside container
# Don't want ubuntu to be editable, just change uid and gid. User ubuntu is hardcoded in .devcontainer
ENV USERNAME=ubuntu
ARG USER_UID=1000
ARG USER_GID=1000
RUN apt-get update && apt-get install -y sudo gnupg2 gnupg1 \
&& echo "$USERNAME ALL=(root) NOPASSWD:ALL" > /etc/sudoers.d/$USERNAME \
&& chmod 0440 /etc/sudoers.d/$USERNAME \
&& mkdir -p /home/$USERNAME \
&& chown -R $USERNAME:$USERNAME /home/$USERNAME \
&& rm -rf /var/lib/apt/lists/* \
&& chsh -s /bin/bash $USERNAME
# This is a slow operation (~40s on my cpu)
# Much better than chown -R $USERNAME:$USERNAME /opt/dynamo/venv (~10min on my cpu)
COPY --from=base --chown=$USER_UID:$USER_GID /opt/dynamo/venv/ /opt/dynamo/venv/
RUN chown $USERNAME:$USERNAME /opt/dynamo/venv
COPY --from=base --chown=$USERNAME:$USERNAME /usr/local/bin /usr/local/bin
USER $USERNAME
ENV HOME=/home/$USERNAME
ENV PYTHONPATH=/workspace/dynamo/components/planner/src:/workspace/examples/sglang:$PYTHONPATH
WORKDIR $HOME
# https://code.visualstudio.com/remote/advancedcontainers/persist-bash-history
RUN SNIPPET="export PROMPT_COMMAND='history -a' && export HISTFILE=$HOME/.commandhistory/.bash_history" \
&& mkdir -p $HOME/.commandhistory \
&& touch $HOME/.commandhistory/.bash_history \
&& echo "$SNIPPET" >> "$HOME/.bashrc"
RUN mkdir -p /home/$USERNAME/.cache/
ENTRYPOINT ["/opt/nvidia/nvidia_entrypoint.sh"]
##################################
##### Wheel Build Image ##########
##################################
# Redeclare ARCH_ALT ARG so it's available for interpolation in the FROM instruction
ARG ARCH_ALT
FROM quay.io/pypa/manylinux_2_28_${ARCH_ALT} AS wheel_builder
ARG CARGO_BUILD_JOBS
# Set CARGO_BUILD_JOBS to 16 if not provided
# This is to prevent cargo from building $(nproc) jobs in parallel,
# which might exceed the number of opened files limit.
ENV CARGO_BUILD_JOBS=${CARGO_BUILD_JOBS:-16}
# Use build arg RELEASE_BUILD = true to generate wheels for Python 3.10, 3.11 and 3.12.
ARG RELEASE_BUILD
# Keep in sync with the base image.
ENV NIXL_PREFIX=/opt/nvidia/nvda_nixl
WORKDIR /workspace
RUN yum update -y \
&& yum install -y llvm-toolset \
&& yum install -y python3.12-devel \
&& yum install -y protobuf-compiler \
&& yum clean all \
&& rm -rf /var/cache/yum
ENV RUSTUP_HOME=/usr/local/rustup \
CARGO_HOME=/usr/local/cargo \
CARGO_TARGET_DIR=/workspace/target \
VIRTUAL_ENV=/opt/dynamo/venv
COPY --from=base $RUSTUP_HOME $RUSTUP_HOME
COPY --from=base $CARGO_HOME $CARGO_HOME
COPY --from=base $NIXL_PREFIX $NIXL_PREFIX
COPY --from=base /workspace /workspace
COPY --from=base $VIRTUAL_ENV $VIRTUAL_ENV
ENV PATH=$CARGO_HOME/bin:$VIRTUAL_ENV/bin:$PATH
# Copy configuration files
COPY pyproject.toml /workspace/
COPY README.md /workspace/
COPY LICENSE /workspace/
COPY Cargo.toml /workspace/
COPY Cargo.lock /workspace/
COPY rust-toolchain.toml /workspace/
COPY hatch_build.py /workspace/
# Copy source code
COPY lib/ /workspace/lib/
COPY components /workspace/components
COPY launch /workspace/launch
RUN cargo build \
--release \
--locked \
--features dynamo-llm/block-manager \
--workspace
# Build dynamo wheel
RUN uv build --wheel --out-dir /workspace/dist && \
cd /workspace/lib/bindings/python && \
uv pip install maturin[patchelf] && \
maturin build --release --features block-manager --out /workspace/dist && \
if [ "$RELEASE_BUILD" = "true" ]; then \
uv run --python 3.11 maturin build --release --features block-manager --out /workspace/dist && \
uv run --python 3.10 maturin build --release --features block-manager --out /workspace/dist; \
fi
#######################################
########## CI Minimum Image ###########
#######################################
FROM base AS ci_minimum
ENV DYNAMO_HOME=/workspace
ENV CARGO_TARGET_DIR=/workspace/target
WORKDIR /workspace
COPY --from=wheel_builder /workspace /workspace
COPY --from=wheel_builder $NIXL_PREFIX $NIXL_PREFIX
# Copy Cargo cache to avoid re-downloading dependencies
COPY --from=wheel_builder $CARGO_HOME $CARGO_HOME
# Copy rest of the code
COPY . /workspace
# Package the bindings
RUN mkdir -p /opt/dynamo/bindings/wheels && \
mkdir /opt/dynamo/bindings/lib && \
cp dist/ai_dynamo*cp312*.whl /opt/dynamo/bindings/wheels/. && \
cp target/release/metrics /usr/local/bin
RUN uv pip install /workspace/dist/ai_dynamo_runtime*cp312*.whl && \
uv pip install /workspace/dist/ai_dynamo*any.whl
# Copy launch banner
RUN --mount=type=bind,source=./container/launch_message.txt,target=/workspace/launch_message.txt \
sed '/^#\s/d' /workspace/launch_message.txt > ~/.launch_screen && \
echo "cat ~/.launch_screen" >> ~/.bashrc
ENV PYTHONPATH=/workspace/dynamo/components/planner/src:/workspace/examples/sglang/utils:$PYTHONPATH # Copy CUDA development tools (nvcc, headers, dependencies, etc.) from framework devel image
COPY --from=framework /usr/local/cuda/bin/nvcc /usr/local/cuda/bin/nvcc
######################################## COPY --from=framework /usr/local/cuda/bin/cudafe++ /usr/local/cuda/bin/cudafe++
########## Development Image ########### COPY --from=framework /usr/local/cuda/bin/ptxas /usr/local/cuda/bin/ptxas
######################################## COPY --from=framework /usr/local/cuda/bin/fatbinary /usr/local/cuda/bin/fatbinary
FROM ci_minimum AS dev COPY --from=framework /usr/local/cuda/include/ /usr/local/cuda/include/
COPY --from=framework /usr/local/cuda/nvvm /usr/local/cuda/nvvm
ENTRYPOINT ["/opt/nvidia/nvidia_entrypoint.sh"] COPY --from=framework /usr/local/cuda/lib64/libcudart.so* /usr/local/cuda/lib64/
CMD []
####################################
########## Runtime Image ###########
####################################
FROM ${RUNTIME_IMAGE}:${RUNTIME_IMAGE_TAG} AS runtime
WORKDIR /workspace
ENV DYNAMO_HOME=/workspace
ENV VIRTUAL_ENV=/opt/dynamo/venv
ENV PATH="${VIRTUAL_ENV}/bin:${PATH}"
### COPY NATS & ETCD ### ### COPY NATS & ETCD ###
# Copy nats and etcd from base image # Copy nats and etcd from dynamo_base image
COPY --from=base /usr/bin/nats-server /usr/bin/nats-server COPY --from=dynamo_base /usr/bin/nats-server /usr/bin/nats-server
COPY --from=base /usr/local/bin/etcd/ /usr/local/bin/etcd/ COPY --from=dynamo_base /usr/local/bin/etcd/ /usr/local/bin/etcd/
# Add ETCD and CUDA binaries to PATH so cicc and other CUDA tools are accessible # Add ETCD and CUDA binaries to PATH so cicc and other CUDA tools are accessible
ENV PATH=/usr/local/bin/etcd/:/usr/local/cuda/nvvm/bin:$PATH ENV PATH=/usr/local/bin/etcd/:/usr/local/cuda/nvvm/bin:$PATH
# Copy prometheus from base image # Copy UCX from framework image as plugin for NIXL
COPY --from=base /usr/local/bin/prometheus /usr/local/bin/prometheus # Copy NIXL source from framework image
# Copy dynamo wheels for gitlab artifacts
# Copy UCX from base image as plugin for NIXL COPY --from=dynamo_base /usr/local/ucx /usr/local/ucx
# Copy NIXL source from wheel_builder image COPY --from=dynamo_base $NIXL_PREFIX $NIXL_PREFIX
ARG ARCH_ALT
ENV NIXL_PREFIX=/opt/nvidia/nvda_nixl
ENV NIXL_LIB_DIR=$NIXL_PREFIX/lib/${ARCH_ALT}-linux-gnu
ENV NIXL_PLUGIN_DIR=$NIXL_LIB_DIR/plugins
COPY --from=base /usr/local/ucx /usr/local/ucx
COPY --from=wheel_builder $NIXL_PREFIX $NIXL_PREFIX
# Copy CUDA development tools (nvcc, headers, dependencies, etc.) from base devel image # Copies sglang repo (editable install)
COPY --from=base /usr/local/cuda/bin/nvcc /usr/local/cuda/bin/nvcc COPY --from=framework /opt/sglang /opt/sglang
COPY --from=base /usr/local/cuda/bin/cudafe++ /usr/local/cuda/bin/cudafe++
COPY --from=base /usr/local/cuda/bin/ptxas /usr/local/cuda/bin/ptxas
COPY --from=base /usr/local/cuda/bin/fatbinary /usr/local/cuda/bin/fatbinary
COPY --from=base /usr/local/cuda/include/ /usr/local/cuda/include/
COPY --from=base /usr/local/cuda/nvvm /usr/local/cuda/nvvm
COPY --from=base /usr/local/cuda/lib64/libcudart.so* /usr/local/cuda/lib64/
ENV LD_LIBRARY_PATH=\ ENV LD_LIBRARY_PATH=\
$NIXL_LIB_DIR:\ $NIXL_LIB_DIR:\
...@@ -451,46 +203,26 @@ $NIXL_PLUGIN_DIR:\ ...@@ -451,46 +203,26 @@ $NIXL_PLUGIN_DIR:\
/usr/local/ucx/lib/ucx:\ /usr/local/ucx/lib/ucx:\
$LD_LIBRARY_PATH $LD_LIBRARY_PATH
# Setup the python environment ### VIRTUAL ENVIRONMENT SETUP ###
# libnuma-dev is a required dependency for sglang integration with NIXL
COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /bin/ # Copy uv and entire virtual environment from framework container
RUN apt-get update && \ COPY --from=framework /bin/uv /bin/uvx /bin/
DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \ COPY --from=framework ${VIRTUAL_ENV} ${VIRTUAL_ENV}
build-essential python3-dev libnuma-dev \
# jq and curl for polling various endpoints and health checks # Install dynamo, NIXL, and dynamo-specific dependencies
curl \ COPY benchmarks/ /opt/dynamo/benchmarks/
jq \ COPY --from=dynamo_base /opt/dynamo/wheelhouse/ /opt/dynamo/wheelhouse/
# For debugging
vim \
# Libraries required by UCX to find RDMA devices
libibverbs1 rdma-core ibverbs-utils libibumad3 \
libnuma1 librdmacm1 ibverbs-providers && \
rm -rf /var/lib/apt/lists/* && \
uv venv $VIRTUAL_ENV --python 3.12 && \
echo "source $VIRTUAL_ENV/bin/activate" >> ~/.bashrc
# Install the wheels and symlink executables to /usr/local/bin so dynamo components can use them
# Copy metrics binary from wheel_builder image, not part of ai-dynamo wheel
# Dynamo components currently do not have the VIRTUAL_ENV in their PATH, so we need to symlink the executables
COPY --from=ci_minimum /workspace/target/release/metrics /usr/local/bin/metrics
COPY --from=wheel_builder /workspace/dist/*.whl wheelhouse/
COPY --from=base /workspace/wheels/nixl/*.whl wheelhouse/
ARG SGLANG_VERSION
RUN uv pip install \ RUN uv pip install \
/workspace/wheelhouse/ai_dynamo_runtime*cp312*.whl \ /opt/dynamo/wheelhouse/ai_dynamo_runtime*cp312*.whl \
/workspace/wheelhouse/ai_dynamo*any.whl \ /opt/dynamo/wheelhouse/ai_dynamo*any.whl \
/workspace/wheelhouse/nixl*.whl \ /opt/dynamo/wheelhouse/nixl/nixl*.whl \
"sglang[all]==${SGLANG_VERSION}" /opt/dynamo/benchmarks && \
rm -rf /opt/dynamo/benchmarks
# Common dependencies # Install common and test dependencies
# TODO: Remove extra install and use pyproject.toml to define all dependencies
RUN --mount=type=bind,source=./container/deps/requirements.txt,target=/tmp/requirements.txt \ RUN --mount=type=bind,source=./container/deps/requirements.txt,target=/tmp/requirements.txt \
uv pip install --requirement /tmp/requirements.txt --mount=type=bind,source=./container/deps/requirements.test.txt,target=/tmp/requirements.test.txt \
uv pip install --requirement /tmp/requirements.txt --requirement /tmp/requirements.test.txt
# Install test dependencies
# TODO: Remove this once we have a functional CI image built on top of the runtime image
RUN --mount=type=bind,source=./container/deps/requirements.test.txt,target=/tmp/requirements.txt \
uv pip install --requirement /tmp/requirements.txt
# Copy launch banner # Copy launch banner
RUN --mount=type=bind,source=./container/launch_message.txt,target=/workspace/launch_message.txt \ RUN --mount=type=bind,source=./container/launch_message.txt,target=/workspace/launch_message.txt \
...@@ -507,6 +239,10 @@ COPY benchmarks /workspace/benchmarks ...@@ -507,6 +239,10 @@ COPY benchmarks /workspace/benchmarks
COPY examples /workspace/examples COPY examples /workspace/examples
RUN uv pip install /workspace/benchmarks RUN uv pip install /workspace/benchmarks
# Copy benchmarks, backends and tests for CI
COPY tests /workspace/tests
COPY benchmarks /workspace/benchmarks
COPY components/backends/sglang /workspace/components/backends/sglang
# Copy attribution files # Copy attribution files
COPY ATTRIBUTION* LICENSE /workspace/ COPY ATTRIBUTION* LICENSE /workspace/
...@@ -514,3 +250,76 @@ ENV PYTHONPATH=/workspace/examples/sglang/utils:$PYTHONPATH ...@@ -514,3 +250,76 @@ ENV PYTHONPATH=/workspace/examples/sglang/utils:$PYTHONPATH
ENTRYPOINT ["/opt/nvidia/nvidia_entrypoint.sh"] ENTRYPOINT ["/opt/nvidia/nvidia_entrypoint.sh"]
CMD [] CMD []
###########################################################
########## Development (run.sh, runs as root user) ########
###########################################################
#
# PURPOSE: Local development environment for use with run.sh (not Dev Container plug-in)
#
# This stage runs as root and provides:
# - Development tools and utilities for local debugging
# - Support for vscode/cursor development outside the Dev Container plug-in
#
# Use this stage if you need a full-featured development environment with extra tools,
# but do not use it with the Dev Container plug-in.
FROM runtime AS dev
# Don't want ubuntu to be editable, just change uid and gid.
ARG WORKSPACE_DIR=/workspace
# Install utilities as root
RUN apt-get update -y && \
apt-get install -y --no-install-recommends \
# Install utilities
nvtop \
wget \
tmux \
vim \
git \
openssh-client \
iproute2 \
rsync \
zip \
unzip \
htop \
# Build Dependencies
autoconf \
automake \
cmake \
libtool \
meson \
net-tools \
pybind11-dev \
# Rust build dependencies
clang \
libclang-dev \
protobuf-compiler && \
rm -rf /var/lib/apt/lists/*
COPY --from=runtime /usr/local/bin /usr/local/bin
# Set workspace directory variable
ENV WORKSPACE_DIR=${WORKSPACE_DIR} \
DYNAMO_HOME=${WORKSPACE_DIR} \
RUSTUP_HOME=/usr/local/rustup \
CARGO_HOME=/usr/local/cargo \
CARGO_TARGET_DIR=/workspace/target \
VIRTUAL_ENV=/opt/dynamo/venv \
PATH=/usr/local/cargo/bin:$PATH
COPY --from=dynamo_base /usr/local/rustup /usr/local/rustup
COPY --from=dynamo_base /usr/local/cargo /usr/local/cargo
COPY --from=runtime ${VIRTUAL_ENV} ${VIRTUAL_ENV}
# so we can use maturin develop
RUN uv pip install maturin[patchelf]
# Make sure to sync this with the one specified on README.md.
# This is a generic PYTHONPATH which works for all the frameworks, so some paths may not be relevant for this particular framework.
ENV PYTHONPATH=${WORKSPACE_DIR}/components/metrics/src:${WORKSPACE_DIR}/components/frontend/src:${WORKSPACE_DIR}/components/planner/src:${WORKSPACE_DIR}/components/backends/mocker/src:${WORKSPACE_DIR}/components/backends/trtllm/src:${WORKSPACE_DIR}/components/backends/vllm/src:${WORKSPACE_DIR}/components/backends/sglang/src:${WORKSPACE_DIR}/components/backends/llama_cpp/src
ENTRYPOINT ["/opt/nvidia/nvidia_entrypoint.sh"]
CMD []
\ No newline at end of file
...@@ -618,7 +618,7 @@ if [ -z "$RUN_PREFIX" ]; then ...@@ -618,7 +618,7 @@ if [ -z "$RUN_PREFIX" ]; then
fi fi
# TODO: Follow 2-step build process for all frameworks once necessary changes are made to the sglang and TRT-LLM backend Dockerfiles. # TODO: Follow 2-step build process for all frameworks once necessary changes are made to the sglang and TRT-LLM backend Dockerfiles.
if [[ $FRAMEWORK == "VLLM" ]]; then if [[ $FRAMEWORK == "VLLM" ]] || [[ $FRAMEWORK == "SGLANG" ]]; then
# Define base image tag before using it # Define base image tag before using it
DYNAMO_BASE_IMAGE="dynamo-base:${VERSION}" DYNAMO_BASE_IMAGE="dynamo-base:${VERSION}"
# Start base image build # Start base image build
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment