build: OPS-1140: Refactor sglang dockerfile to support wideep (#3792)

Signed-off-by: Tushar Sharma <tusharma@nvidia.com> Signed-off-by: Dan Aloni <dan.aloni@vastdata.com> Signed-off-by: tzulingk@nvidia.com <tzulingk@nvidia.com> Signed-off-by: Tzu-Ling Kan <tzulingk@nvidia.com> Signed-off-by: Graham King <grahamk@nvidia.com> Signed-off-by: Ziqi Fan <ziqif@nvidia.com> Signed-off-by: Dan Gil <dagil@nvidia.com> Signed-off-by: Dillon Cullinan <dcullinan@nvidia.com> Signed-off-by: Harrison Saturley-Hall <hsaturleyhal@nvidia.com> Signed-off-by: Anant Sharma <anants@nvidia.com> Co-authored-by: Dan Aloni <dan.aloni@vastdata.com> Co-authored-by: Ziqi Fan <ziqif@nvidia.com> Co-authored-by: Elyas Mehtabuddin <emehtabuddin@nvidia.com> Co-authored-by: Tzu-Ling Kan <tzulingk@nvidia.com> Co-authored-by: ishandhanani <82981111+ishandhanani@users.noreply.github.com> Co-authored-by: Graham King <grahamk@nvidia.com> Co-authored-by: dagil-nvidia <dagil@nvidia.com> Co-authored-by: Rohan Varma <rohanv@nvidia.com> Co-authored-by: Dillon Cullinan <dcullinan92@gmail.com> Co-authored-by: Harrison Saturley-Hall <hsaturleyhal@nvidia.com> Co-authored-by: Anant Sharma <anants@nvidia.com>

build: OPS-1140: Refactor sglang dockerfile to support wideep (#3792)
Signed-off-by: Tushar Sharma <tusharma@nvidia.com> Signed-off-by: Dan Aloni <dan.aloni@vastdata.com> Signed-off-by: tzulingk@nvidia.com <tzulingk@nvidia.com> Signed-off-by: Tzu-Ling Kan <tzulingk@nvidia.com> Signed-off-by: Graham King <grahamk@nvidia.com> Signed-off-by: Ziqi Fan <ziqif@nvidia.com> Signed-off-by: Dan Gil <dagil@nvidia.com> Signed-off-by: Dillon Cullinan <dcullinan@nvidia.com> Signed-off-by: Harrison Saturley-Hall <hsaturleyhal@nvidia.com> Signed-off-by: Anant Sharma <anants@nvidia.com> Co-authored-by: Dan Aloni <dan.aloni@vastdata.com> Co-authored-by: Ziqi Fan <ziqif@nvidia.com> Co-authored-by: Elyas Mehtabuddin <emehtabuddin@nvidia.com> Co-authored-by: Tzu-Ling Kan <tzulingk@nvidia.com> Co-authored-by: ishandhanani <82981111+ishandhanani@users.noreply.github.com> Co-authored-by: Graham King <grahamk@nvidia.com> Co-authored-by: dagil-nvidia <dagil@nvidia.com> Co-authored-by: Rohan Varma <rohanv@nvidia.com> Co-authored-by: Dillon Cullinan <dcullinan92@gmail.com> Co-authored-by: Harrison Saturley-Hall <hsaturleyhal@nvidia.com> Co-authored-by: Anant Sharma <anants@nvidia.com>
7c74764b · Tushar Sharma · GitHub · 6a84ffd3 · 7c74764b · 7c74764b
Unverified Commit 7c74764b authored Oct 27, 2025 by Tushar Sharma Committed by GitHub Oct 27, 2025
Show whitespace changes
Inline Side-by-side

Showing with 370 additions and 188 deletions

container/Dockerfile container/Dockerfile +11 -5

container/Dockerfile.sglang container/Dockerfile.sglang +351 -180

container/build.sh container/build.sh +8 -3

No files found.
--- a/container/Dockerfile
+++ b/container/Dockerfile
@@ -240,10 +240,10 @@ RUN --mount=type=secret,id=aws-key-id,env=AWS_ACCESS_KEY_ID \
 RUN --mount=type=secret,id=aws-key-id,env=AWS_ACCESS_KEY_ID \
    --mount=type=secret,id=aws-secret-id,env=AWS_SECRET_ACCESS_KEY \
    if [ "$ARCH" = "arm64" ]; then \
-        cd ${NIXL_SRC_DIR} && uv build . --out-dir /opt/dynamo/wheelhouse/nixl \
+        cd ${NIXL_SRC_DIR} && uv build . --out-dir /opt/dynamo/wheelhouse/nixl --python $PYTHON_VERSION \
        --config-settings=setup-args="-Ddisable_gds_backend=true"; \
    else \
-        cd ${NIXL_SRC_DIR} && uv build . --out-dir /opt/dynamo/wheelhouse/nixl; \
+        cd ${NIXL_SRC_DIR} && uv build . --out-dir /opt/dynamo/wheelhouse/nixl --python $PYTHON_VERSION; \
    fi
 ##################################
@@ -296,9 +296,8 @@ ENV CARGO_BUILD_JOBS=${CARGO_BUILD_JOBS:-16} \
    PATH=/usr/local/cargo/bin:/opt/dynamo/venv/bin:$PATH
 # Install system dependencies
-ARG PYTHON_VERSION
 RUN dnf update -y \
-    && dnf install -y llvm-toolset protobuf-compiler python${PYTHON_VERSION}-devel wget unzip \
+    && dnf install -y llvm-toolset protobuf-compiler wget unzip \
    && dnf clean all \
    && rm -rf /var/cache/dnf
@@ -324,7 +323,14 @@ ENV PROTOC=/usr/local/bin/protoc
 COPY --from=base $RUSTUP_HOME $RUSTUP_HOME
 COPY --from=base $CARGO_HOME $CARGO_HOME
 COPY --from=base $NIXL_PREFIX $NIXL_PREFIX
-COPY --from=base $VIRTUAL_ENV $VIRTUAL_ENV
+ARG PYTHON_VERSION
+RUN mkdir -p /opt/dynamo/venv && \
+    uv venv /opt/dynamo/venv --python $PYTHON_VERSION
+ENV VIRTUAL_ENV=/opt/dynamo/venv \
+    PATH="/opt/dynamo/venv/bin:${PATH}"
 # Install SCCACHE if requested
 COPY container/use-sccache.sh /tmp/use-sccache.sh

--- a/container/Dockerfile.sglang
+++ b/container/Dockerfile.sglang
-# SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# syntax=docker/dockerfile:1.10.0
+# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 # SPDX-License-Identifier: Apache-2.0
-# Note: This Dockerfile will be deprecated in favor of Dockerfile.sglang-wideep soon. Please build the container with that Dockerfile instead.
+ARG CUDA_VERSION=12.9.1
-ARG BASE_IMAGE="nvcr.io/nvidia/cuda-dl-base"
+# Runtime image and build-time configuration (aligned with other backends)
-# TODO OPS-612: NCCL will hang with 25.03, so use 25.01 for now
+# TODO: OPS-<number>: Use the same runtime image as the other backends
-# Please check https://github.com/ai-dynamo/dynamo/pull/1065
-# for details and reproducer to manually test if the image
-# can be updated to later versions.
-ARG BASE_IMAGE_TAG="25.01-cuda12.8-devel-ubuntu24.04"
 ARG RUNTIME_IMAGE="nvcr.io/nvidia/cuda"
-ARG RUNTIME_IMAGE_TAG="12.8.1-runtime-ubuntu24.04"
+ARG RUNTIME_IMAGE_TAG="12.9.1-cudnn-runtime-ubuntu24.04"
-# Make sure to update the dependency version in pyproject.toml when updating this
+ARG PYTHON_VERSION=3.10
-ARG SGLANG_VERSION="0.5.3.post2"
-# Define general architecture ARGs for supporting both x86 and aarch64 builds.
-#   ARCH: Used for package suffixes (e.g., amd64, arm64)
-#   ARCH_ALT: Used for Rust targets, manylinux suffix (e.g., x86_64, aarch64)
-#
-# Default values are for x86/amd64:
-#   --build-arg ARCH=amd64 --build-arg ARCH_ALT=x86_64
-#
-# For arm64/aarch64, build with:
-#   --build-arg ARCH=arm64 --build-arg ARCH_ALT=aarch64
-#
-# NOTE: There isn't an easy way to define one of these values based on the other value
-# without adding if statements everywhere, so just define both as ARGs for now.
 ARG ARCH=amd64
 ARG ARCH_ALT=x86_64
-# Python configuration
+ARG CARGO_BUILD_JOBS
-ARG PYTHON_VERSION=3.12
+# sccache configuration - inherit from base build
+ARG USE_SCCACHE
+ARG SCCACHE_BUCKET=""
+ARG SCCACHE_REGION=""
 ARG DYNAMO_BASE_IMAGE="dynamo:latest-none"
 FROM ${DYNAMO_BASE_IMAGE} AS dynamo_base
@@ -40,10 +26,11 @@ FROM ${DYNAMO_BASE_IMAGE} AS dynamo_base
 ########## Framework Development Image ################
 ########################################################
 #
-# PURPOSE: Framework development and SGLang compilation
+# PURPOSE: Framework development and SGLang/DeepEP/NVSHMEM compilation
 #
 # This stage builds and compiles framework dependencies including:
 # - SGLang inference engine with CUDA support
+# - DeepEP and NVSHMEM
 # - All necessary build tools and compilation dependencies
 # - Framework-level Python packages and extensions
 #
@@ -53,60 +40,267 @@ FROM ${DYNAMO_BASE_IMAGE} AS dynamo_base
 # - Create custom builds with specific optimization flags
 #
-# Use dynamo base image (see /container/Dockerfile for more details)
+FROM nvidia/cuda:${CUDA_VERSION}-cudnn-devel-ubuntu24.04 AS framework
-FROM ${BASE_IMAGE}:${BASE_IMAGE_TAG} AS framework
+# Declare all ARGs
+ARG BUILD_TYPE=all
+ARG DEEPEP_COMMIT=9af0e0d0e74f3577af1979c9b9e1ac2cad0104ee
+ARG DEEPEP_GB_COMMIT=1b14ad661c7640137fcfe93cccb2694ede1220b0
+ARG CMAKE_BUILD_PARALLEL_LEVEL=2
+ARG FLASHMLA_COMMIT=1408756a88e52a25196b759eaf8db89d2b51b5a1
+ARG SGL_KERNEL_VERSION=0.3.15
+ARG SGLANG_COMMIT=0.5.3.post2
+ARG GDRCOPY_COMMIT=v2.4.4
+ARG NVSHMEM_VERSION=3.3.9
+ARG GRACE_BLACKWELL=false
+ARG ARCH
+ARG ARCH_ALT
 ARG PYTHON_VERSION
+ARG USE_SCCACHE
-RUN apt-get update -y \
+ARG SCCACHE_BUCKET
+ARG SCCACHE_REGION
+ARG CARGO_BUILD_JOBS
+ARG CUDA_VERSION
+# Set all environment variables
+ENV DEBIAN_FRONTEND=noninteractive \
+    TZ=America/Los_Angeles \
+    CUDA_HOME=/usr/local/cuda \
+    GDRCOPY_HOME=/usr/src/gdrdrv-2.4.4/ \
+    NVSHMEM_DIR=/sgl-workspace/nvshmem/install \
+    PATH="${PATH}:/usr/local/nvidia/bin" \
+    LD_LIBRARY_PATH="${LD_LIBRARY_PATH}:/usr/local/nvidia/lib:/usr/local/nvidia/lib64" \
+    LANG=en_US.UTF-8 \
+    LANGUAGE=en_US:en \
+    LC_ALL=en_US.UTF-8
+# Combined: Python setup, locale, and all package installation
+RUN apt-get update \
+    && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends software-properties-common \
+    && add-apt-repository ppa:deadsnakes/ppa -y \
+    && apt-get update \
    && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
-        # Python runtime - CRITICAL for virtual environment to work
+        # Python (using other python versions as needed)
        python${PYTHON_VERSION}-dev \
+        python${PYTHON_VERSION}-venv \
+        python${PYTHON_VERSION}-distutils \
+        python3-pip \
+        # Build essentials
        build-essential \
+        cmake \
+        ninja-build \
+        ccache \
+        patchelf \
        git \
        git-lfs \
-        # SGLang build dependencies
+        # Core system utilities
-        cmake \
+        tzdata \
-        ibverbs-providers \
+        locales \
-        ibverbs-utils \
+        ca-certificates \
-        libibumad-dev \
+        dkms \
-        libibverbs-dev \
+        kmod \
+        # Command line tools
+        wget \
+        curl \
+        jq \
+        unzip \
+        # Network utilities
+        netcat-openbsd \
+        # SSL and pkg-config
+        libssl-dev \
+        pkg-config \
+        # MPI and NUMA
+        libopenmpi-dev \
+        libnuma1 \
        libnuma-dev \
-        librdmacm-dev \
+        numactl \
-        rdma-core \
+        # InfiniBand/RDMA
-    && apt-get clean \
+        libibverbs-dev \
-    && rm -rf /var/lib/apt/lists/*
+        libibverbs1 \
+        libibumad3 \
-### VIRTUAL ENVIRONMENT SETUP ###
+        librdmacm1 \
+        libnl-3-200 \
-COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /bin/
+        libnl-route-3-200 \
-ARG PYTHON_VERSION
+        libnl-route-3-dev \
-# Create virtual environment
+        libnl-3-dev \
-RUN mkdir -p /opt/dynamo/venv && \
+        ibverbs-providers \
-    uv venv /opt/dynamo/venv --python $PYTHON_VERSION
+        infiniband-diags \
+        perftest \
-# Activate virtual environment
+        # Development libraries
-ENV VIRTUAL_ENV=/opt/dynamo/venv \
+        libgoogle-glog-dev \
-    PATH="/opt/dynamo/venv/bin:${PATH}"
+        libgtest-dev \
+        libjsoncpp-dev \
-ARG ARCH
+        libunwind-dev \
-# Redeclare ARCH and ARCH_ALT so they're available in this stage
+        libboost-all-dev \
-ARG ARCH_ALT
+        libgrpc-dev \
+        libgrpc++-dev \
+        libprotobuf-dev \
+        protobuf-compiler \
+        protobuf-compiler-grpc \
+        pybind11-dev \
+        libhiredis-dev \
+        libcurl4-openssl-dev \
+        libczmq4 \
+        libczmq-dev \
+        libfabric-dev \
+        # Package building tools
+        devscripts \
+        debhelper \
+        fakeroot \
+        check \
+        libsubunit0 \
+        libsubunit-dev \
+    # Set Python alternatives
+    && update-alternatives --install /usr/bin/python3 python3 /usr/bin/python${PYTHON_VERSION} 1 \
+    && update-alternatives --set python3 /usr/bin/python${PYTHON_VERSION} \
+    && update-alternatives --install /usr/bin/python python /usr/bin/python${PYTHON_VERSION} 1 \
+    && update-alternatives --set python /usr/bin/python${PYTHON_VERSION} \
+    # Set up locale
+    && locale-gen en_US.UTF-8 \
+    # Cleanup
+    && rm -rf /var/lib/apt/lists/* \
+    && apt-get clean
+# Install sccache if requested
+COPY container/use-sccache.sh /tmp/use-sccache.sh
+RUN if [ "$USE_SCCACHE" = "true" ]; then \
+    /tmp/use-sccache.sh install; \
+fi
+# Set environment variables - they'll be empty strings if USE_SCCACHE=false
+ENV SCCACHE_BUCKET=${USE_SCCACHE:+${SCCACHE_BUCKET}} \
+    SCCACHE_REGION=${USE_SCCACHE:+${SCCACHE_REGION}} \
+    SCCACHE_S3_KEY_PREFIX=${USE_SCCACHE:+${ARCH}} \
+    RUSTC_WRAPPER=${USE_SCCACHE:+sccache} \
+    CMAKE_C_COMPILER_LAUNCHER=${USE_SCCACHE:+sccache} \
+    CMAKE_CXX_COMPILER_LAUNCHER=${USE_SCCACHE:+sccache} \
+    CMAKE_CUDA_COMPILER_LAUNCHER=${USE_SCCACHE:+sccache}
+WORKDIR /sgl-workspace
+# GDRCopy installation
+RUN git clone --depth 1 --branch ${GDRCOPY_COMMIT} https://github.com/NVIDIA/gdrcopy.git \
+    && cd gdrcopy/packages \
+    && export CUDA=${CUDA_HOME} \
+    && ./build-deb-packages.sh \
+    && dpkg -i gdrdrv-dkms_*.deb libgdrapi_*.deb gdrcopy-tests_*.deb gdrcopy_*.deb
+# Fix DeepEP IBGDA symlink
+RUN ln -sf /usr/lib/$(uname -m)-linux-gnu/libmlx5.so.1 /usr/lib/$(uname -m)-linux-gnu/libmlx5.so
+# Install SGLang (requires CUDA 12.8.1 or 12.9.1)
+RUN python3 -m pip install --no-cache-dir --ignore-installed pip==25.3 setuptools==80.9.0 wheel==0.45.1 html5lib==1.1 six==1.17.0 \
+    && git clone --depth 1 --branch v${SGLANG_COMMIT} https://github.com/sgl-project/sglang.git \
+    && cd sglang \
+    && case "$CUDA_VERSION" in \
+        12.8.1) CUINDEX=128 ;; \
+        12.9.1) CUINDEX=129 ;; \
+        *) echo "Error: Unsupported CUDA version for sglang: $CUDA_VERSION (requires 12.8.1 or 12.9.1)" && exit 1 ;; \
+    esac \
+    && python3 -m pip install --no-cache-dir sgl-kernel==${SGL_KERNEL_VERSION} \
+    && python3 -m pip install --no-cache-dir -e "python[${BUILD_TYPE}]" --extra-index-url https://download.pytorch.org/whl/cu${CUINDEX} \
+    && python3 -m pip install --no-cache-dir nvidia-nccl-cu12==2.27.6 --force-reinstall --no-deps \
+    && FLASHINFER_LOGGING_LEVEL=warning python3 -m flashinfer --download-cubin
+# Download and extract NVSHMEM source, clone DeepEP (use Tom's fork for GB200)
+RUN --mount=type=cache,target=/var/cache/curl \
+    curl --retry 3 --retry-delay 2 -fsSL -o /var/cache/curl/nvshmem_src_cuda12-all-all-${NVSHMEM_VERSION}.tar.gz https://developer.download.nvidia.com/compute/redist/nvshmem/${NVSHMEM_VERSION}/source/nvshmem_src_cuda12-all-all-${NVSHMEM_VERSION}.tar.gz \
+    && tar -xf /var/cache/curl/nvshmem_src_cuda12-all-all-${NVSHMEM_VERSION}.tar.gz \
+    && mv nvshmem_src nvshmem \
+    && rm -f /var/cache/curl/nvshmem_src_cuda12-all-all-${NVSHMEM_VERSION}.tar.gz \
+    && if [ "$GRACE_BLACKWELL" = true ]; then \
+        git clone --depth 1 https://github.com/fzyzcjy/DeepEP.git \
+        && cd DeepEP \
+        && git fetch --depth 1 origin ${DEEPEP_GB_COMMIT} \
+        && git checkout ${DEEPEP_GB_COMMIT}; \
+    else \
+        git clone --depth 1 https://github.com/deepseek-ai/DeepEP.git \
+        && cd DeepEP \
+        && git fetch --depth 1 origin ${DEEPEP_COMMIT} \
+        && git checkout ${DEEPEP_COMMIT}; \
+    fi \
+    && sed -i 's/#define NUM_CPU_TIMEOUT_SECS 100/#define NUM_CPU_TIMEOUT_SECS 1000/' csrc/kernels/configs.cuh
+# Build and install NVSHMEM library only (without python library)
+RUN --mount=type=secret,id=aws-key-id,env=AWS_ACCESS_KEY_ID \
+    --mount=type=secret,id=aws-secret-id,env=AWS_SECRET_ACCESS_KEY \
+    export SCCACHE_S3_KEY_PREFIX=${SCCACHE_S3_KEY_PREFIX:-${ARCH}} && \
+    cd /sgl-workspace/nvshmem && \
+    if [ "$GRACE_BLACKWELL" = true ]; then CUDA_ARCH="90;100;120"; else CUDA_ARCH="90"; fi && \
+    NVSHMEM_SHMEM_SUPPORT=0 \
+    NVSHMEM_UCX_SUPPORT=0 \
+    NVSHMEM_USE_NCCL=0 \
+    NVSHMEM_MPI_SUPPORT=0 \
+    NVSHMEM_IBGDA_SUPPORT=1 \
+    NVSHMEM_PMIX_SUPPORT=0 \
+    NVSHMEM_TIMEOUT_DEVICE_POLLING=0 \
+    NVSHMEM_USE_GDRCOPY=1 \
+    cmake -S . -B build/ -DCMAKE_INSTALL_PREFIX=${NVSHMEM_DIR} -DCMAKE_CUDA_ARCHITECTURES=${CUDA_ARCH} -DNVSHMEM_BUILD_PYTHON_LIB=OFF && \
+    cmake --build build --target install -j${CMAKE_BUILD_PARALLEL_LEVEL} && \
+    /tmp/use-sccache.sh show-stats "NVSHMEM"
+# Build nvshmem4py wheels separately (Python 3.10, CUDA 12) to avoid building the python library twice for multiple python versions
+# Need to reconfigure with PYTHON_LIB=ON to add the nvshmem4py subdirectory
+RUN --mount=type=secret,id=aws-key-id,env=AWS_ACCESS_KEY_ID \
+    --mount=type=secret,id=aws-secret-id,env=AWS_SECRET_ACCESS_KEY \
+    export SCCACHE_S3_KEY_PREFIX=${SCCACHE_S3_KEY_PREFIX:-${ARCH}} && \
+    cd /sgl-workspace/nvshmem && \
+    if [ "$GRACE_BLACKWELL" = true ]; then CUDA_ARCH="90;100;120"; else CUDA_ARCH="90"; fi && \
+    NVSHMEM_SHMEM_SUPPORT=0 \
+    NVSHMEM_UCX_SUPPORT=0 \
+    NVSHMEM_USE_NCCL=0 \
+    NVSHMEM_MPI_SUPPORT=0 \
+    NVSHMEM_IBGDA_SUPPORT=1 \
+    NVSHMEM_PMIX_SUPPORT=0 \
+    NVSHMEM_TIMEOUT_DEVICE_POLLING=0 \
+    NVSHMEM_USE_GDRCOPY=1 \
+    cmake -S . -B build/ -DCMAKE_INSTALL_PREFIX=${NVSHMEM_DIR} -DCMAKE_CUDA_ARCHITECTURES=${CUDA_ARCH} -DNVSHMEM_BUILD_PYTHON_LIB=ON && \
+    cmake --build build --target build_nvshmem4py_wheel_cu12_${PYTHON_VERSION} -j${CMAKE_BUILD_PARALLEL_LEVEL} && \
+    /tmp/use-sccache.sh show-stats "NVSHMEM4PY"
+# Install DeepEP
+RUN --mount=type=secret,id=aws-key-id,env=AWS_ACCESS_KEY_ID \
+    --mount=type=secret,id=aws-secret-id,env=AWS_SECRET_ACCESS_KEY \
+    export SCCACHE_S3_KEY_PREFIX=${SCCACHE_S3_KEY_PREFIX:-${ARCH}} && \
+    cd /sgl-workspace/DeepEP && \
+    NVSHMEM_DIR=${NVSHMEM_DIR} TORCH_CUDA_ARCH_LIST="9.0;10.0" pip install --no-build-isolation .
+# Install flashmla
+RUN --mount=type=secret,id=aws-key-id,env=AWS_ACCESS_KEY_ID \
+    --mount=type=secret,id=aws-secret-id,env=AWS_SECRET_ACCESS_KEY \
+    export SCCACHE_S3_KEY_PREFIX=${SCCACHE_S3_KEY_PREFIX:-${ARCH}} && \
+    if [ "${ARCH}" = "amd64" ]; then \
+        git clone https://github.com/deepseek-ai/FlashMLA.git flash-mla \
+        && cd flash-mla \
+        && git checkout ${FLASHMLA_COMMIT} \
+        && git submodule update --init --recursive \
+        && export FLASH_MLA_DISABLE_SM100=1 \
+        && pip install --no-build-isolation -v . ;\
+    fi
+# Copy rust installation from dynamo_base to avoid duplication efforts
+COPY --from=dynamo_base /usr/local/rustup /usr/local/rustup
+COPY --from=dynamo_base /usr/local/cargo /usr/local/cargo
-WORKDIR /workspace
+ENV RUSTUP_HOME=/usr/local/rustup \
-# Install SGLang and related dependencies
+    CARGO_HOME=/usr/local/cargo \
-ARG SGLANG_VERSION
+    CARGO_TARGET_DIR=/workspace/target \
-RUN --mount=type=cache,target=/root/.cache/uv \
+    PATH=/usr/local/cargo/bin:$PATH \
-    cd /opt && \
+    CARGO_BUILD_JOBS=${CARGO_BUILD_JOBS:-16}
-    git clone https://github.com/sgl-project/sglang.git && \
-    cd sglang && \
+# Install essential Python build tools
-    git checkout v${SGLANG_VERSION} && \
+RUN python3 -m pip install --no-cache-dir \
-    # Install in editable mode for development
+    mooncake-transfer-engine==0.3.6.post1 \
-    uv pip install --prerelease=allow -e "python[all]"
+    scikit-build-core==0.11.6 \
+    setuptools-rust==1.12.0
-# Set env var that allows for forceful shutdown of inflight requests in SGL's TokenizerManager
-ENV SGL_FORCE_SHUTDOWN=1
+# Build and install sgl-router
+RUN --mount=type=secret,id=aws-key-id,env=AWS_ACCESS_KEY_ID \
+    --mount=type=secret,id=aws-secret-id,env=AWS_SECRET_ACCESS_KEY \
+    export SCCACHE_S3_KEY_PREFIX=${SCCACHE_S3_KEY_PREFIX:-${ARCH}} \
+    && cd /sgl-workspace/sglang/sgl-router \
+    && cargo build --release \
+    && python3 -m pip install --no-cache-dir .
 ##################################################
 ########## Runtime Image ########################
@@ -114,126 +308,79 @@ ENV SGL_FORCE_SHUTDOWN=1
 #
 # PURPOSE: Production runtime environment
 #
-# This stage creates a lightweight production-ready image containing:
+# This stage creates a production-ready image containing:
-# - Pre-compiled SGLang and framework dependencies
+# - Pre-compiled SGLang, DeepEP, and NVSHMEM components
 # - Dynamo runtime libraries and Python packages
 # - Essential runtime dependencies and configurations
 # - Optimized for inference workloads and deployment
 #
 # Use this stage when you need:
-# - Production deployment of Dynamo with SGLang
+# - Production deployment of Dynamo with SGLang + DeepEP
 # - Minimal runtime footprint without build tools
 # - Ready-to-run inference server environment
-# - Base for custom application containers
 #
+FROM framework AS runtime
-FROM ${RUNTIME_IMAGE}:${RUNTIME_IMAGE_TAG} AS runtime
 WORKDIR /workspace
-ENV DYNAMO_HOME=/opt/dynamo
-ENV VIRTUAL_ENV=/opt/dynamo/venv
-ENV PATH="${VIRTUAL_ENV}/bin:${PATH}"
+ARG ARCH
 ARG ARCH_ALT
 ARG PYTHON_VERSION
-ENV NIXL_PREFIX=/opt/nvidia/nvda_nixl
-ENV NIXL_LIB_DIR=$NIXL_PREFIX/lib/${ARCH_ALT}-linux-gnu
-ENV NIXL_PLUGIN_DIR=$NIXL_LIB_DIR/plugins
-# Install Python, build-essential and python3-dev as apt dependencies
+ENV DYNAMO_HOME=/opt/dynamo
-RUN apt-get update && \
+ENV NVSHMEM_DIR=/sgl-workspace/nvshmem/install
-    DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
+ENV NIXL_PREFIX=/opt/nvidia/nvda_nixl
-        # Python runtime - CRITICAL for virtual environment to work
+ENV NIXL_LIB_DIR=${NIXL_PREFIX}/lib/${ARCH_ALT}-linux-gnu
-        python${PYTHON_VERSION}-dev \
+ENV NIXL_PLUGIN_DIR=${NIXL_LIB_DIR}/plugins
-        build-essential \
+ENV LD_LIBRARY_PATH=\
-        # jq and curl for polling various endpoints and health checks
+${NVSHMEM_DIR}/lib:\
-        jq \
+${NIXL_LIB_DIR}:\
-        git \
+${NIXL_PLUGIN_DIR}:\
-        git-lfs \
+/usr/local/ucx/lib:\
-        curl \
+/usr/local/ucx/lib/ucx:\
-        # Libraries required by UCX to find RDMA devices
+/usr/local/nvidia/lib64:\
-        libibverbs1 rdma-core ibverbs-utils libibumad3 \
+${LD_LIBRARY_PATH}
-        libnuma1 librdmacm1 ibverbs-providers \
-        # JIT Kernel Compilation, flashinfer
-        ninja-build \
-        g++ \
-        # prometheus dependencies
-        ca-certificates && \
-    rm -rf /var/lib/apt/lists/*
-# Copy CUDA development tools (nvcc, headers, dependencies, etc.) from framework devel image
+# Copy NATS and ETCD from dynamo_base, and UCX/NIXL
-COPY --from=framework /usr/local/cuda/bin/nvcc /usr/local/cuda/bin/nvcc
-COPY --from=framework /usr/local/cuda/bin/cudafe++ /usr/local/cuda/bin/cudafe++
-COPY --from=framework /usr/local/cuda/bin/ptxas /usr/local/cuda/bin/ptxas
-COPY --from=framework /usr/local/cuda/bin/fatbinary /usr/local/cuda/bin/fatbinary
-COPY --from=framework /usr/local/cuda/include/ /usr/local/cuda/include/
-COPY --from=framework /usr/local/cuda/nvvm /usr/local/cuda/nvvm
-COPY --from=framework /usr/local/cuda/lib64/libcudart.so* /usr/local/cuda/lib64/
-### COPY NATS & ETCD ###
-# Copy nats and etcd from dynamo_base image
 COPY --from=dynamo_base /usr/bin/nats-server /usr/bin/nats-server
 COPY --from=dynamo_base /usr/local/bin/etcd/ /usr/local/bin/etcd/
-# Add ETCD and CUDA binaries to PATH so cicc and other CUDA tools are accessible
-ENV PATH=/usr/local/bin/etcd/:/usr/local/cuda/nvvm/bin:$PATH
-# Copy UCX from framework image as plugin for NIXL
-# Copy NIXL source from framework image
-# Copy dynamo wheels for gitlab artifacts
 COPY --from=dynamo_base /usr/local/ucx /usr/local/ucx
 COPY --from=dynamo_base $NIXL_PREFIX $NIXL_PREFIX
+ENV PATH=/usr/local/bin/etcd/:/usr/local/cuda/nvvm/bin:$PATH
-# Copies sglang repo (editable install)
+# Install Dynamo wheels from dynamo_base wheelhouse
-COPY --from=framework /opt/sglang /opt/sglang
-ENV LD_LIBRARY_PATH=\
-$NIXL_LIB_DIR:\
-$NIXL_PLUGIN_DIR:\
-/usr/local/ucx/lib:\
-/usr/local/ucx/lib/ucx:\
-$LD_LIBRARY_PATH
-### VIRTUAL ENVIRONMENT SETUP ###
-# Copy uv and entire virtual environment from framework container
-COPY --from=framework /bin/uv /bin/uvx /bin/
-COPY --from=framework ${VIRTUAL_ENV} ${VIRTUAL_ENV}
-# Install dynamo, NIXL, and dynamo-specific dependencies
 COPY benchmarks/ /opt/dynamo/benchmarks/
 COPY --from=dynamo_base /opt/dynamo/wheelhouse/ /opt/dynamo/wheelhouse/
-RUN uv pip install \
+RUN pip install \
    /opt/dynamo/wheelhouse/ai_dynamo_runtime*.whl \
    /opt/dynamo/wheelhouse/ai_dynamo*any.whl \
    /opt/dynamo/wheelhouse/nixl/nixl*.whl \
    && cd /opt/dynamo/benchmarks \
-    && UV_GIT_LFS=1 uv pip install --no-cache . \
+    && pip install --no-cache . \
    && cd - \
    && rm -rf /opt/dynamo/benchmarks
 # Install common and test dependencies
 RUN --mount=type=bind,source=./container/deps/requirements.txt,target=/tmp/requirements.txt \
    --mount=type=bind,source=./container/deps/requirements.test.txt,target=/tmp/requirements.test.txt \
-    UV_GIT_LFS=1 uv pip install \
+    pip install \
        --no-cache \
        --requirement /tmp/requirements.txt \
        --requirement /tmp/requirements.test.txt
-# Copy launch banner
+## Copy attribution files and launch banner
-RUN --mount=type=bind,source=./container/launch_message.txt,target=/workspace/launch_message.txt \
+COPY ATTRIBUTION* LICENSE /workspace/
-    sed '/^#\s/d' /workspace/launch_message.txt > ~/.launch_screen && \
+COPY container/launch_message.txt /workspace/launch_message.txt
+RUN sed '/^#\s/d' /workspace/launch_message.txt > ~/.launch_screen && \
    echo "cat ~/.launch_screen" >> ~/.bashrc
 # Copy tests, benchmarks, deploy and components for CI
 COPY tests /workspace/tests
-COPY benchmarks /workspace/benchmarks
 COPY examples /workspace/examples
+COPY benchmarks /workspace/benchmarks
 COPY deploy /workspace/deploy
 COPY components/ /workspace/components/
-# Copy attribution files
-COPY ATTRIBUTION* LICENSE /workspace/
 ENTRYPOINT ["/opt/nvidia/nvidia_entrypoint.sh"]
 CMD []
@@ -252,56 +399,80 @@ CMD []
 FROM runtime AS dev
-# Don't want ubuntu to be editable, just change uid and gid.
+ARG WORKSPACE_DIR=/sgl-workspace/dynamo
-ARG WORKSPACE_DIR=/workspace
-# Install utilities as root
+# Install development tools and utilities
 RUN apt-get update -y && \
    apt-get install -y --no-install-recommends  \
-    # Install utilities
+    # System monitoring and debugging tools
    nvtop \
+    htop \
+    gdb \
+    # Network and system utilities
    wget \
-    tmux \
-    vim \
-    git \
-    openssh-client \
    iproute2 \
+    net-tools \
+    openssh-client \
    rsync \
+    lsof \
+    # File and archive utilities
    zip \
-    unzip \
+    tree \
-    htop \
+    # Development and build tools
-    # Build Dependencies
+    vim \
+    tmux \
+    git \
+    git-lfs \
    autoconf \
    automake \
    cmake \
    libtool \
    meson \
-    net-tools \
+    bear \
-    pybind11-dev \
+    ccache \
-    # Rust build dependencies
+    less \
+    # Language and development support
    clang \
    libclang-dev \
-    protobuf-compiler && \
+    # Shell and productivity tools
+    zsh \
+    silversearcher-ag \
+    cloc \
+    locales \
+    # NVIDIA tools dependencies
+    gnupg && \
+    echo "deb https://developer.download.nvidia.com/devtools/repos/ubuntu2004/amd64 /" | tee /etc/apt/sources.list.d/nvidia-devtools.list && \
+    apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/7fa2af80.pub && \
+    apt-get update -y && \
+    apt-get install -y nsight-systems-cli && \
    rm -rf /var/lib/apt/lists/*
-# Set workspace directory variable
+# Install clang-format and clangd
-ENV WORKSPACE_DIR=${WORKSPACE_DIR} \
+RUN curl --retry 3 --retry-delay 2 -LSso /usr/local/bin/clang-format https://github.com/muttleyxd/clang-tools-static-binaries/releases/download/master-32d3ac78/clang-format-16_linux-amd64 \
-    DYNAMO_HOME=${WORKSPACE_DIR} \
+    && chmod +x /usr/local/bin/clang-format \
-    RUSTUP_HOME=/usr/local/rustup \
+    && curl --retry 3 --retry-delay 2 -L https://github.com/clangd/clangd/releases/download/18.1.3/clangd-linux-18.1.3.zip -o clangd.zip \
-    CARGO_HOME=/usr/local/cargo \
+    && unzip clangd.zip \
-    CARGO_TARGET_DIR=/workspace/target \
+    && cp -r clangd_18.1.3/bin/* /usr/local/bin/ \
-    VIRTUAL_ENV=/opt/dynamo/venv \
+    && cp -r clangd_18.1.3/lib/* /usr/local/lib/ \
-    PATH=/usr/local/cargo/bin:$PATH
+    && rm -rf clangd_18.1.3 clangd.zip
-COPY --from=dynamo_base /usr/local/rustup /usr/local/rustup
-COPY --from=dynamo_base /usr/local/cargo /usr/local/cargo
-# Install maturin, for maturin develop
-RUN uv pip install maturin[patchelf]
 # Editable install of dynamo
 COPY pyproject.toml README.md hatch_build.py /workspace/
-RUN uv pip install --no-deps -e .
+RUN pip install --no-deps -e .
+# Install Python development packages
+RUN pip install --no-cache-dir \
+    maturin[patchelf] \
+    pytest \
+    black \
+    isort \
+    icdiff \
+    scikit_build_core \
+    uv \
+    pre-commit \
+    pandas \
+    matplotlib \
+    tabulate
 ENTRYPOINT ["/opt/nvidia/nvidia_entrypoint.sh"]
 CMD []
--- a/container/build.sh
+++ b/container/build.sh
@@ -329,7 +329,6 @@ get_options() {
                missing_requirement "$1"
            fi
            ;;
        --vllm-max-jobs)
            # Set MAX_JOBS for vLLM compilation (only used by Dockerfile.vllm)
            if [ "$2" ]; then
@@ -716,7 +715,10 @@ fi
 if [ -n "${MAX_JOBS}" ]; then
    BUILD_ARGS+=" --build-arg MAX_JOBS=${MAX_JOBS} "
 fi
+if [[ $FRAMEWORK == "SGLANG" ]]; then
+    echo "Forcing Python version to 3.10 for sglang image build"
+    BUILD_ARGS+=" --build-arg PYTHON_VERSION=3.10"
+fi
 # Add sccache build arguments
 if [ "$USE_SCCACHE" = true ]; then
    BUILD_ARGS+=" --build-arg USE_SCCACHE=true"
@@ -725,7 +727,10 @@ if [ "$USE_SCCACHE" = true ]; then
    BUILD_ARGS+=" --secret id=aws-key-id,env=AWS_ACCESS_KEY_ID"
    BUILD_ARGS+=" --secret id=aws-secret-id,env=AWS_SECRET_ACCESS_KEY"
 fi
+if [[ "$PLATFORM" == *"linux/arm64"* && "${FRAMEWORK}" == "SGLANG" ]]; then
+    # Add arguments required for sglang blackwell build
+    BUILD_ARGS+=" --build-arg GRACE_BLACKWELL=true --build-arg BUILD_TYPE=blackwell_aarch64"
+fi
 LATEST_TAG="--tag dynamo:latest-${FRAMEWORK,,}"
 if [ -n "${TARGET}" ] && [ "${TARGET}" != "local-dev" ]; then
    LATEST_TAG="${LATEST_TAG}-${TARGET}"