feat: bump sglang to `0.5.6.post2` and swap to upstream runtime container (#4762)

Signed-off-by: Dillon Cullinan <dcullinan@nvidia.com> Signed-off-by: Dmitry Tokarev <dtokarev@nvidia.com> Co-authored-by: Dillon Cullinan <dcullinan@nvidia.com> Co-authored-by: Dmitry Tokarev <dtokarev@nvidia.com>

feat: bump sglang to `0.5.6.post2` and swap to upstream runtime container (#4762)
Signed-off-by: Dillon Cullinan <dcullinan@nvidia.com> Signed-off-by: Dmitry Tokarev <dtokarev@nvidia.com> Co-authored-by: Dillon Cullinan <dcullinan@nvidia.com> Co-authored-by: Dmitry Tokarev <dtokarev@nvidia.com>
3e0459fb · ishandhanani · GitHub · f4245c99 · 3e0459fb · 3e0459fb
Unverified Commit 3e0459fb authored Dec 19, 2025 by ishandhanani Committed by GitHub Dec 19, 2025
8 changed files
--- a/.github/actions/docker-build/action.yml
+++ b/.github/actions/docker-build/action.yml
@@ -164,8 +164,10 @@ runs:
        # Run the sanity check script inside the container
        # The script is located in /workspace/deploy/sanity_check.py in runtime containers
+        export WORKSPACE=/workspace
        set +e
-        docker run --rm "$IMAGE_TAG" python /workspace/deploy/sanity_check.py --runtime-check --no-gpu-check
+        docker run --rm "$IMAGE_TAG" python ${WORKSPACE}/deploy/sanity_check.py --runtime-check --no-gpu-check
        SANITY_CHECK_EXIT_CODE=$?
        set -e
        if [ ${SANITY_CHECK_EXIT_CODE} -ne 0 ]; then

--- a/benchmarks/pyproject.toml
+++ b/benchmarks/pyproject.toml
@@ -46,7 +46,7 @@ dependencies = [
    "pydantic>=2",
    "tabulate",
    "types-tabulate",
-    # Satisfies vLLM 0.11.0 (>=4.55.2), vLLM 0.11.2 (>=4.56.0,<5), TRT-LLM 1.2.0rc5 (==4.56.0), SGLang 0.5.6 (==4.57.1)
+    # Satisfies vLLM 0.11.0 (>=4.55.2), vLLM 0.11.2 (>=4.56.0,<5), TRT-LLM 1.2.0rc5 (==4.56.0), SGLang 0.5.6.post2 (==4.57.1)
    "transformers>=4.56.0,<=4.57.1",
    "pytest-mypy",
 ]

--- a/container/Dockerfile.sglang
+++ b/container/Dockerfile.sglang
-# syntax=docker/dockerfile:1.10.0
 # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 # SPDX-License-Identifier: Apache-2.0
 #
@@ -19,7 +18,7 @@
 # properly without needing slow chown -R operations (which can add 2-10 extra
 # minutes).
 #
-# DEVELOPMENT PATHS THAT MUST BE GROUP-WRITABLE (for non-virtualenv containers):
+# DEVELOPMENT PATHS THAT MUST BE GROUP-WRITABLE (for virtualenv containers):
 #   /workspace            - Users create/modify project files
 #   /home/dynamo          - Users create config/cache files
 #   /home/dynamo/.local   - SGLang uses $HOME/.local/lib/python3.10/site-packages for pip install
@@ -31,23 +30,19 @@
 # This section contains build arguments that are common and shared with
 # the plain Dockerfile, so they should NOT have a default. The source of truth is from build.sh.
 ARG BASE_IMAGE
 ARG BASE_IMAGE_TAG
-ARG FRAMEWORK_IMAGE
-ARG FRAMEWORK_IMAGE_TAG
 ARG PYTHON_VERSION
 ARG ENABLE_KVBM
 ARG ENABLE_MEDIA_NIXL
 ARG ENABLE_MEDIA_FFMPEG
 ARG CARGO_BUILD_JOBS
-ARG CUDA_VERSION
-ARG ARCH=amd64
+ARG RUNTIME_IMAGE="lmsysorg/sglang"
-ARG ARCH_ALT=x86_64
+ARG RUNTIME_IMAGE_TAG="v0.5.6.post2-runtime"
-# sccache configuration - inherit from base build
+# SCCACHE configuration
 ARG USE_SCCACHE
 ARG SCCACHE_BUCKET=""
 ARG SCCACHE_REGION=""
@@ -58,6 +53,21 @@ ARG NIXL_REF
 ARG NIXL_GDRCOPY_REF
 ARG NIXL_LIBFABRIC_REF
+# Define general architecture ARGs for supporting both x86 and aarch64 builds.
+#   ARCH: Used for package suffixes (e.g., amd64, arm64)
+#   ARCH_ALT: Used for Rust targets, manylinux suffix (e.g., x86_64, aarch64)
+#
+# Default values are for x86/amd64:
+#   --build-arg ARCH=amd64 --build-arg ARCH_ALT=x86_64
+#
+# For arm64/aarch64, build with:
+#   --build-arg ARCH=arm64 --build-arg ARCH_ALT=aarch64
+#
+# NOTE: There isn't an easy way to define one of these values based on the other value
+# without adding if statements everywhere, so just define both as ARGs for now.
+ARG ARCH=amd64
+ARG ARCH_ALT=x86_64
 ##################################
 ########## Base Image ############
 ##################################
@@ -217,8 +227,7 @@ RUN if [ "$USE_SCCACHE" = "true" ]; then \
 # Set SCCACHE environment variables
 ENV SCCACHE_BUCKET=${USE_SCCACHE:+${SCCACHE_BUCKET}} \
-    SCCACHE_REGION=${USE_SCCACHE:+${SCCACHE_REGION}} \
+    SCCACHE_REGION=${USE_SCCACHE:+${SCCACHE_REGION}}
-    RUSTC_WRAPPER=${USE_SCCACHE:+sccache}
 # Build FFmpeg from source
 # Do not delete the source tarball for legal reasons
@@ -415,304 +424,41 @@ RUN --mount=type=secret,id=aws-key-id,env=AWS_ACCESS_KEY_ID \
    fi && \
    /tmp/use-sccache.sh show-stats "Dynamo"
-########################################################
+##################################
-########## Framework Development Image ################
+########## Runtime Image #########
-########################################################
+##################################
-#
-# PURPOSE: Framework development and SGLang/DeepEP/NVSHMEM compilation
+FROM ${RUNTIME_IMAGE}:${RUNTIME_IMAGE_TAG} AS runtime
-#
-# This stage builds and compiles framework dependencies including:
+# cleanup unnecessary libs
-# - SGLang inference engine with CUDA support
+RUN apt remove -y python3-apt &&\
-# - DeepEP and NVSHMEM
+    pip uninstall -y termplotlib
-# - All necessary build tools and compilation dependencies
-# - Framework-level Python packages and extensions
+# This ARG is still utilized for SGLANG Version extraction
-#
+ARG RUNTIME_IMAGE_TAG
-# Use this stage when you need to:
+WORKDIR /workspace
-# - Build SGLang from source with custom modifications
-# - Develop or debug framework-level components
+# Install NATS and ETCD
-# - Create custom builds with specific optimization flags
+COPY --from=dynamo_base /usr/bin/nats-server /usr/bin/nats-server
-#
+COPY --from=dynamo_base /usr/local/bin/etcd/ /usr/local/bin/etcd/
-#FROM nvcr.io/nvidia/cuda:${CUDA_VERSION}-cudnn-devel-ubuntu24.04 AS framework
-FROM ${FRAMEWORK_IMAGE}:${FRAMEWORK_IMAGE_TAG} AS framework
+ENV PATH=/usr/local/bin/etcd:$PATH
-# Declare all ARGs
-ARG BUILD_TYPE=all
-ARG DEEPEP_COMMIT=9af0e0d0e74f3577af1979c9b9e1ac2cad0104ee
-ARG DEEPEP_GB_COMMIT=1b14ad661c7640137fcfe93cccb2694ede1220b0
-ARG CMAKE_BUILD_PARALLEL_LEVEL=2
-ARG SGL_KERNEL_VERSION=0.3.16.post5
-ARG SGLANG_COMMIT=0.5.6
-ARG GDRCOPY_COMMIT=v2.4.4
-ARG NVSHMEM_VERSION=3.3.9
-ARG GRACE_BLACKWELL=false
-ARG ARCH
-ARG ARCH_ALT
-ARG PYTHON_VERSION
-ARG CARGO_BUILD_JOBS
-ARG CUDA_VERSION
-# Set all environment variables
-ENV DEBIAN_FRONTEND=noninteractive \
-    TZ=America/Los_Angeles \
-    CUDA_HOME=/usr/local/cuda \
-    GDRCOPY_HOME=/usr/src/gdrdrv-2.4.4/ \
-    NVSHMEM_DIR=/sgl-workspace/nvshmem/install \
-    PATH="${PATH}:/usr/local/nvidia/bin" \
-    LD_LIBRARY_PATH="${LD_LIBRARY_PATH}:/usr/local/nvidia/lib:/usr/local/nvidia/lib64" \
-    LANG=en_US.UTF-8 \
-    LANGUAGE=en_US:en \
-    LC_ALL=en_US.UTF-8
-# Combined: Python setup, locale, and all package installation
-RUN apt-get update \
-    && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends software-properties-common \
-    && add-apt-repository ppa:deadsnakes/ppa -y \
-    && apt-get update \
-    && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
-        # Python (using other python versions as needed)
-        python${PYTHON_VERSION}-dev \
-        python${PYTHON_VERSION}-venv \
-        python${PYTHON_VERSION}-distutils \
-        python3-pip \
-        # Build essentials
-        build-essential \
-        cmake \
-        ninja-build \
-        ccache \
-        patchelf \
-        git \
-        git-lfs \
-        # Core system utilities
-        tzdata \
-        locales \
-        ca-certificates \
-        dkms \
-        kmod \
-        # Command line tools
-        wget \
-        curl \
-        jq \
-        unzip \
-        # Network utilities
-        netcat-openbsd \
-        # SSL and pkg-config
-        libssl-dev \
-        pkg-config \
-        # MPI and NUMA
-        libopenmpi-dev \
-        libnuma1 \
-        libnuma-dev \
-        numactl \
-        # InfiniBand/RDMA
-        libibverbs-dev \
-        libibverbs1 \
-        libibumad3 \
-        librdmacm1 \
-        libnl-3-200 \
-        libnl-route-3-200 \
-        libnl-route-3-dev \
-        libnl-3-dev \
-        ibverbs-providers \
-        infiniband-diags \
-        perftest \
-        # Development libraries
-        libgoogle-glog-dev \
-        libgtest-dev \
-        libjsoncpp-dev \
-        libunwind-dev \
-        libboost-all-dev \
-        libgrpc-dev \
-        libgrpc++-dev \
-        libprotobuf-dev \
-        protobuf-compiler \
-        protobuf-compiler-grpc \
-        pybind11-dev \
-        libhiredis-dev \
-        libcurl4-openssl-dev \
-        libczmq4 \
-        libczmq-dev \
-        libfabric-dev \
-        # Package building tools
-        devscripts \
-        debhelper \
-        fakeroot \
-        check \
-        libsubunit0 \
-        libsubunit-dev \
-    # Set Python alternatives
-    && update-alternatives --install /usr/bin/python3 python3 /usr/bin/python${PYTHON_VERSION} 1 \
-    && update-alternatives --set python3 /usr/bin/python${PYTHON_VERSION} \
-    && update-alternatives --install /usr/bin/python python /usr/bin/python${PYTHON_VERSION} 1 \
-    && update-alternatives --set python /usr/bin/python${PYTHON_VERSION} \
-    # Set up locale
-    && locale-gen en_US.UTF-8 \
-    # Cleanup
-    && rm -rf /var/lib/apt/lists/* \
-    && apt-get clean
-WORKDIR /sgl-workspace
-# GDRCopy installation
-RUN git clone --depth 1 --branch ${GDRCOPY_COMMIT} https://github.com/NVIDIA/gdrcopy.git \
-    && cd gdrcopy/packages \
-    && export CUDA=${CUDA_HOME} \
-    && ./build-deb-packages.sh \
-    && dpkg -i gdrdrv-dkms_*.deb libgdrapi_*.deb gdrcopy-tests_*.deb gdrcopy_*.deb
-# Fix DeepEP IBGDA symlink
-RUN ln -sf /usr/lib/$(uname -m)-linux-gnu/libmlx5.so.1 /usr/lib/$(uname -m)-linux-gnu/libmlx5.so
 # Create dynamo user with group 0 for OpenShift compatibility
 RUN userdel -r ubuntu > /dev/null 2>&1 || true \
    && useradd -m -s /bin/bash -g 0 dynamo \
    && [ `id -u dynamo` -eq 1000 ] \
-    && mkdir -p /workspace /home/dynamo/.cache /opt/dynamo \
+    && mkdir -p /home/dynamo/.cache /opt/dynamo \
    # Non-recursive chown - only the directories themselves, not contents
-    && chown dynamo:0 /sgl-workspace /workspace /home/dynamo /home/dynamo/.cache /opt/dynamo \
+    && chown dynamo:0 /home/dynamo /home/dynamo/.cache /opt/dynamo /workspace \
    # No chmod needed: umask 002 handles new files, COPY --chmod handles copied content
    # Set umask globally for all subsequent RUN commands (must be done as root before USER dynamo)
    # NOTE: Setting ENV UMASK=002 does NOT work - umask is a shell builtin, not an environment variable
    && mkdir -p /etc/profile.d && echo 'umask 002' > /etc/profile.d/00-umask.sh
 USER dynamo
-ENV HOME=/home/dynamo
+# Copy attribution files
-# This picks up the umask 002 from the /etc/profile.d/00-umask.sh file for subsequent RUN commands
+COPY --chmod=664 --chown=dynamo:0 ATTRIBUTION* LICENSE /workspace/
-SHELL ["/bin/bash", "-l", "-o", "pipefail", "-c"]
-# Install SGLang (requires CUDA 12.8.1 or 12.9.1). Note that when system-wide packages is not writable,
-# so it gets installed to ~/.local/lib/python<version>/site-packages.
-RUN python3 -m pip install --no-cache-dir --ignore-installed pip==25.3 setuptools==80.9.0 wheel==0.45.1 html5lib==1.1 six==1.17.0 \
-    && git clone --depth 1 --branch v${SGLANG_COMMIT} https://github.com/sgl-project/sglang.git \
-    && cd sglang \
-    && case "$CUDA_VERSION" in \
-        12.8.1) CUINDEX=128 ;; \
-        12.9.1) CUINDEX=129 ;; \
-        *) echo "Error: Unsupported CUDA version for sglang: $CUDA_VERSION (requires 12.8.1 or 12.9.1)" && exit 1 ;; \
-    esac \
-    && python3 -m pip install --no-cache-dir sgl-kernel==${SGL_KERNEL_VERSION} \
-    && python3 -m pip install --no-cache-dir -e "python[${BUILD_TYPE}]" --extra-index-url https://download.pytorch.org/whl/cu${CUINDEX} \
-    && python3 -m pip install --no-cache-dir nvidia-nccl-cu12==2.27.6 --force-reinstall --no-deps \
-    && FLASHINFER_LOGGING_LEVEL=warning python3 -m flashinfer --download-cubin
-# Download and extract NVSHMEM source, clone DeepEP (use Tom's fork for GB200)
-RUN --mount=type=cache,target=/var/cache/curl,uid=1000,gid=0 \
-    curl --retry 3 --retry-delay 2 -fsSL -o /var/cache/curl/nvshmem_src_cuda12-all-all-${NVSHMEM_VERSION}.tar.gz https://developer.download.nvidia.com/compute/redist/nvshmem/${NVSHMEM_VERSION}/source/nvshmem_src_cuda12-all-all-${NVSHMEM_VERSION}.tar.gz \
-    && tar -xf /var/cache/curl/nvshmem_src_cuda12-all-all-${NVSHMEM_VERSION}.tar.gz \
-    && mv nvshmem_src nvshmem \
-    && rm -f /var/cache/curl/nvshmem_src_cuda12-all-all-${NVSHMEM_VERSION}.tar.gz \
-    && if [ "$GRACE_BLACKWELL" = true ]; then \
-        git clone --depth 1 https://github.com/fzyzcjy/DeepEP.git \
-        && cd DeepEP \
-        && git fetch --depth 1 origin ${DEEPEP_GB_COMMIT} \
-        && git checkout ${DEEPEP_GB_COMMIT}; \
-    else \
-        git clone --depth 1 https://github.com/deepseek-ai/DeepEP.git \
-        && cd DeepEP \
-        && git fetch --depth 1 origin ${DEEPEP_COMMIT} \
-        && git checkout ${DEEPEP_COMMIT}; \
-    fi \
-    && sed -i 's/#define NUM_CPU_TIMEOUT_SECS 100/#define NUM_CPU_TIMEOUT_SECS 1000/' csrc/kernels/configs.cuh
-# Build and install NVSHMEM library only (without python library)
-RUN cd /sgl-workspace/nvshmem && \
-    if [ "$GRACE_BLACKWELL" = true ]; then CUDA_ARCH="90;100;120"; else CUDA_ARCH="90"; fi && \
-    NVSHMEM_SHMEM_SUPPORT=0 \
-    NVSHMEM_UCX_SUPPORT=0 \
-    NVSHMEM_USE_NCCL=0 \
-    NVSHMEM_MPI_SUPPORT=0 \
-    NVSHMEM_IBGDA_SUPPORT=1 \
-    NVSHMEM_PMIX_SUPPORT=0 \
-    NVSHMEM_TIMEOUT_DEVICE_POLLING=0 \
-    NVSHMEM_USE_GDRCOPY=1 \
-    cmake -S . -B build/ -DCMAKE_INSTALL_PREFIX=${NVSHMEM_DIR} -DCMAKE_CUDA_ARCHITECTURES=${CUDA_ARCH} -DNVSHMEM_BUILD_PYTHON_LIB=OFF && \
-    cmake --build build --target install -j${CMAKE_BUILD_PARALLEL_LEVEL}
-# Build nvshmem4py wheels separately (Python 3.10, CUDA 12) to avoid building the python library twice for multiple python versions
-# Need to reconfigure with PYTHON_LIB=ON to add the nvshmem4py subdirectory
-RUN cd /sgl-workspace/nvshmem && \
-    if [ "$GRACE_BLACKWELL" = true ]; then CUDA_ARCH="90;100;120"; else CUDA_ARCH="90"; fi && \
-    NVSHMEM_SHMEM_SUPPORT=0 \
-    NVSHMEM_UCX_SUPPORT=0 \
-    NVSHMEM_USE_NCCL=0 \
-    NVSHMEM_MPI_SUPPORT=0 \
-    NVSHMEM_IBGDA_SUPPORT=1 \
-    NVSHMEM_PMIX_SUPPORT=0 \
-    NVSHMEM_TIMEOUT_DEVICE_POLLING=0 \
-    NVSHMEM_USE_GDRCOPY=1 \
-    cmake -S . -B build/ -DCMAKE_INSTALL_PREFIX=${NVSHMEM_DIR} -DCMAKE_CUDA_ARCHITECTURES=${CUDA_ARCH} -DNVSHMEM_BUILD_PYTHON_LIB=ON && \
-    cmake --build build --target build_nvshmem4py_wheel_cu12_${PYTHON_VERSION} -j${CMAKE_BUILD_PARALLEL_LEVEL}
-# Install DeepEP
-RUN cd /sgl-workspace/DeepEP && \
-    NVSHMEM_DIR=${NVSHMEM_DIR} TORCH_CUDA_ARCH_LIST="9.0;10.0" pip install --no-build-isolation .
-# Copy rust installation from dynamo_base to avoid duplication efforts
-# Pattern: COPY --chmod=775 <path>; RUN chmod g+w <path> because COPY --chmod only affects <path>/*, not <path>
-COPY --from=dynamo_base --chown=dynamo:0 --chmod=775 /usr/local/rustup /usr/local/rustup
-COPY --from=dynamo_base --chown=dynamo:0 --chmod=775 /usr/local/cargo /usr/local/cargo
-RUN chmod g+w /usr/local/rustup /usr/local/cargo
-ENV RUSTUP_HOME=/usr/local/rustup \
-    CARGO_HOME=/usr/local/cargo \
-    CARGO_TARGET_DIR=/workspace/target \
-    PATH=/usr/local/cargo/bin:$PATH \
-    CARGO_BUILD_JOBS=${CARGO_BUILD_JOBS:-16}
-# Install essential Python build tools
-RUN python3 -m pip install --no-cache-dir \
-    mooncake-transfer-engine==0.3.6.post1 \
-    scikit-build-core==0.11.6 \
-    setuptools-rust==1.12.0
-##################################################
-########## Runtime Image ########################
-##################################################
-#
-# PURPOSE: Production runtime environment
-#
-# This stage creates a production-ready image containing:
-# - Pre-compiled SGLang, DeepEP, and NVSHMEM components
-# - Dynamo runtime libraries and Python packages
-# - Essential runtime dependencies and configurations
-# - Optimized for inference workloads and deployment
-#
-# Use this stage when you need:
-# - Production deployment of Dynamo with SGLang + DeepEP
-# - Minimal runtime footprint without build tools
-# - Ready-to-run inference server environment
-#
-FROM framework AS runtime
-WORKDIR /workspace
-ARG ARCH
-ARG ARCH_ALT
-ARG PYTHON_VERSION
-ENV DYNAMO_HOME=/opt/dynamo
-ENV NVSHMEM_DIR=/sgl-workspace/nvshmem/install
-ENV NIXL_PREFIX=/opt/nvidia/nvda_nixl
-ENV NIXL_LIB_DIR=${NIXL_PREFIX}/lib/${ARCH_ALT}-linux-gnu
-ENV NIXL_PLUGIN_DIR=${NIXL_LIB_DIR}/plugins
-ENV LD_LIBRARY_PATH=\
-${NVSHMEM_DIR}/lib:\
-${NIXL_LIB_DIR}:\
-${NIXL_PLUGIN_DIR}:\
-/usr/local/ucx/lib:\
-/usr/local/ucx/lib/ucx:\
-/usr/local/nvidia/lib64:\
-${LD_LIBRARY_PATH}
-ENV NVIDIA_DRIVER_CAPABILITIES=video,compute,utility
-# Copy NATS and ETCD from dynamo_base, and UCX/NIXL from wheel_builder
-COPY --from=dynamo_base /usr/bin/nats-server /usr/bin/nats-server
-COPY --from=dynamo_base /usr/local/bin/etcd/ /usr/local/bin/etcd/
-COPY --from=wheel_builder /usr/local/ucx /usr/local/ucx
-COPY --chown=dynamo: --from=wheel_builder $NIXL_PREFIX $NIXL_PREFIX
-COPY --chown=dynamo: --from=wheel_builder /opt/nvidia/nvda_nixl/lib64/. ${NIXL_LIB_DIR}/
-COPY --chown=dynamo: --from=wheel_builder /opt/dynamo/dist/nixl/ /opt/dynamo/wheelhouse/nixl/
-COPY --chown=dynamo: --from=wheel_builder /workspace/nixl/build/src/bindings/python/nixl-meta/nixl-*.whl /opt/dynamo/wheelhouse/nixl/
-ENV PATH=/usr/local/bin/etcd/:/usr/local/cuda/nvvm/bin:${HOME}/.local/bin:$PATH
 # Copy ffmpeg
 RUN --mount=type=bind,from=wheel_builder,source=/usr/local/,target=/tmp/usr/local/ \
@@ -722,60 +468,56 @@ RUN --mount=type=bind,from=wheel_builder,source=/usr/local/,target=/tmp/usr/loca
    cp -r /tmp/usr/local/src/ffmpeg /usr/local/src/; \
    true # in case ffmpeg not enabled
-# Install Dynamo wheels from dynamo_base wheelhouse
 # Pattern: COPY --chmod=775 <path>; chmod g+w <path> done later as root because COPY --chmod only affects <path>/*, not <path>
-COPY --chmod=775 --chown=dynamo:0 benchmarks/ /opt/dynamo/benchmarks/
+COPY --chmod=775 --chown=dynamo:0 benchmarks/ /workspace/benchmarks/
 COPY --chmod=775 --chown=dynamo:0 --from=wheel_builder /opt/dynamo/dist/*.whl /opt/dynamo/wheelhouse/
-RUN python3 -m pip install \
+ENV SGLANG_VERSION="${RUNTIME_IMAGE_TAG%%-*}"
+RUN --mount=type=bind,source=.,target=/mnt/local_src \
+    pip install --no-cache-dir --break-system-packages \
        /opt/dynamo/wheelhouse/ai_dynamo_runtime*.whl \
        /opt/dynamo/wheelhouse/ai_dynamo*any.whl \
-    /opt/dynamo/wheelhouse/nixl/nixl*.whl \
+        sglang==${SGLANG_VERSION}
-    && cd /opt/dynamo/benchmarks \
-    && python3 -m pip install --no-cache . \
-    && cd - \
-    && rm -rf /opt/dynamo/benchmarks
 # Install common and test dependencies
-RUN --mount=type=bind,source=./container/deps/requirements.txt,target=/tmp/requirements.txt \
+RUN --mount=type=bind,source=.,target=/mnt/local_src \
-    --mount=type=bind,source=./container/deps/requirements.test.txt,target=/tmp/requirements.test.txt \
+    pip install --no-cache-dir --break-system-packages \
-    python3 -m pip install \
+        --requirement /mnt/local_src/container/deps/requirements.txt \
-        --no-cache \
+        --requirement /mnt/local_src/container/deps/requirements.test.txt \
-        --requirement /tmp/requirements.txt \
+        sglang==${SGLANG_VERSION} && \
-        --requirement /tmp/requirements.test.txt
+    cd /workspace/benchmarks && \
+    pip install --break-system-packages --no-cache . && \
-## Copy attribution files and launch banner with correct ownership
+    # pip/uv bypasses umask when creating .egg-info files, but chmod -R is fast here (small directory)
-COPY --chmod=664 --chown=dynamo:0 ATTRIBUTION* LICENSE /workspace/
+    chmod -R g+w /workspace/benchmarks && \
+    # Install NVIDIA packages that are needed for DeepEP to work properly
-# Copy tests, benchmarks, deploy and components for CI with correct ownership
+    # This is done in the upstream runtime image too, but we overrode these packages earlier
+    pip install --no-cache-dir --break-system-packages --force-reinstall --no-deps \
+        nvidia-nccl-cu12==2.28.3 \
+        nvidia-cudnn-cu12==9.16.0.29 \
+        nvidia-cutlass-dsl==4.3.0
+# Copy tests, deploy and components for CI with correct ownership
 # Pattern: COPY --chmod=775 <path>; chmod g+w <path> done later as root because COPY --chmod only affects <path>/*, not <path>
 COPY --chmod=775 --chown=dynamo:0 tests /workspace/tests
 COPY --chmod=775 --chown=dynamo:0 examples /workspace/examples
-COPY --chmod=775 --chown=dynamo:0 benchmarks /workspace/benchmarks
 COPY --chmod=775 --chown=dynamo:0 deploy /workspace/deploy
 COPY --chmod=775 --chown=dynamo:0 components/ /workspace/components/
 COPY --chmod=775 --chown=dynamo:0 recipes/ /workspace/recipes/
-# Setup launch banner in common directory accessible to all users
+# Enable forceful shutdown of inflight requests
-RUN --mount=type=bind,source=./container/launch_message/runtime.txt,target=/opt/dynamo/launch_message.txt \
+ENV SGLANG_FORCE_SHUTDOWN=1
-    sed '/^#\s/d' /opt/dynamo/launch_message.txt > /opt/dynamo/.launch_screen
-# Setup environment for all users
+# Our scripting assumes /workspace is where dynamo is located
+# In order to maintain the ability to have sglang and dynamo
+# in the same workspace, symlink /workspace to /sgl-workspace/dynamo
 USER root
-# Fix directory permissions: COPY --chmod only affects contents, not the directory itself
+RUN ln -s /workspace /sgl-workspace/dynamo
-RUN chmod g+w /workspace /workspace/* /opt/dynamo /opt/dynamo/* && \
-    chown dynamo:0 /workspace /opt/dynamo/ && \
-    chmod 755 /opt/dynamo/.launch_screen && \
-    echo 'source /opt/dynamo/venv/bin/activate' >> /etc/bash.bashrc && \
-    echo 'cat /opt/dynamo/.launch_screen' >> /etc/bash.bashrc
 USER dynamo
-# Copy tests, benchmarks, deploy and components for CI with correct ownership
 ARG DYNAMO_COMMIT_SHA
-ENV DYNAMO_COMMIT_SHA=$DYNAMO_COMMIT_SHA
+ENV DYNAMO_COMMIT_SHA=${DYNAMO_COMMIT_SHA}
-ENTRYPOINT ["/opt/nvidia/nvidia_entrypoint.sh"]
+ENV PATH=/home/dynamo/.local/bin:$PATH
-CMD []
 ###########################################################
 ########## Development (run.sh, runs as root user) ########
@@ -792,98 +534,63 @@ CMD []
 FROM runtime AS dev
-ARG WORKSPACE_DIR=/sgl-workspace/dynamo
+# Don't want ubuntu to be editable, just change uid and gid.
-ARG PYTHON_VERSION
+ARG WORKSPACE_DIR=/workspace
-# NOTE: SGLang uses system Python (not a virtualenv in framework/runtime stages) to align with
-# upstream SGLang Dockerfile: https://github.com/sgl-project/sglang/blob/main/docker/Dockerfile
-# For dev stage, we create a lightweight venv with --system-site-packages to satisfy maturin develop
-# requirements while still accessing all system-installed packages (sglang, torch, deepep, etc.)
-COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /bin/
-RUN mkdir -p /opt/dynamo/venv && \
-    uv venv /opt/dynamo/venv --python $PYTHON_VERSION --system-site-packages
-ENV VIRTUAL_ENV=/opt/dynamo/venv \
-    PATH="/opt/dynamo/venv/bin:${PATH}"
 USER root
-# venv permissions are handled by umask 002 set earlier
+# Install utilities as root
-# Install development tools and utilities
 RUN apt-get update -y && \
    apt-get install -y --no-install-recommends  \
-    # System monitoring and debugging tools
+    # Install utilities
    nvtop \
-    htop \
-    gdb \
-    # Network and system utilities
    wget \
-    iproute2 \
+    tmux \
-    net-tools \
+    vim \
+    git \
    openssh-client \
+    iproute2 \
    rsync \
-    lsof \
-    # File and archive utilities
    zip \
-    tree \
+    unzip \
-    # Development and build tools
+    htop \
-    vim \
+    # Build Dependencies
-    tmux \
-    git \
-    git-lfs \
    autoconf \
    automake \
    cmake \
    libtool \
    meson \
-    bear \
+    net-tools \
-    ccache \
+    pybind11-dev \
-    less \
+    # Rust build dependencies
-    pkg-config \
-    # Language and development support
    clang \
    libclang-dev \
-    # Shell and productivity tools
+    protobuf-compiler \
-    zsh \
+    pkg-config && \
-    silversearcher-ag \
-    cloc \
-    locales \
-    # sudo for dev stage
-    sudo \
-    # NVIDIA tools dependencies
-    gnupg && \
-    echo "deb https://developer.download.nvidia.com/devtools/repos/ubuntu2004/amd64 /" | tee /etc/apt/sources.list.d/nvidia-devtools.list && \
-    apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/7fa2af80.pub && \
-    apt-get update -y && \
-    apt-get install -y nsight-systems-cli && \
    rm -rf /var/lib/apt/lists/*
-# Install clang-format and clangd
+# Set umask for group-writable files in dev stage (runs as root)
-RUN curl --retry 3 --retry-delay 2 -LSso /usr/local/bin/clang-format https://github.com/muttleyxd/clang-tools-static-binaries/releases/download/master-32d3ac78/clang-format-16_linux-amd64 \
+RUN mkdir -p /etc/profile.d && echo 'umask 002' > /etc/profile.d/00-umask.sh
-    && chmod +x /usr/local/bin/clang-format \
+SHELL ["/bin/bash", "-l", "-o", "pipefail", "-c"]
-    && curl --retry 3 --retry-delay 2 -L https://github.com/clangd/clangd/releases/download/18.1.3/clangd-linux-18.1.3.zip -o clangd.zip \
-    && unzip clangd.zip \
+# Set workspace directory variable
-    && cp -r clangd_18.1.3/bin/* /usr/local/bin/ \
+ENV WORKSPACE_DIR=${WORKSPACE_DIR} \
-    && cp -r clangd_18.1.3/lib/* /usr/local/lib/ \
+    DYNAMO_HOME=${WORKSPACE_DIR} \
-    && rm -rf clangd_18.1.3 clangd.zip
+    RUSTUP_HOME=/usr/local/rustup \
+    CARGO_HOME=/usr/local/cargo \
+    CARGO_TARGET_DIR=/workspace/target \
+    PATH=/usr/local/cargo/bin:$PATH
+# Copy rust installation from dynamo_base to avoid duplication efforts
+# Pattern: COPY --chmod=775 <path>; chmod g+w <path> because COPY --chmod only affects <path>/*, not <path>
+COPY --from=dynamo_base --chmod=775 /usr/local/rustup /usr/local/rustup
+COPY --from=dynamo_base --chmod=775 /usr/local/cargo /usr/local/cargo
+RUN chmod g+w /usr/local/rustup /usr/local/cargo
+# Install maturin, for maturin develop
 # Editable install of dynamo
-COPY --chmod=664 pyproject.toml README.md hatch_build.py /workspace/
+COPY pyproject.toml README.md hatch_build.py /workspace/
-RUN python3 -m pip install --no-deps -e .
+RUN pip install maturin[patchelf] && \
+    pip install --no-deps -e .
-# Install Python development packages
-RUN python3 -m pip install --no-cache-dir \
-    maturin[patchelf] \
-    pytest \
-    black \
-    isort \
-    icdiff \
-    scikit_build_core \
-    uv \
-    pre-commit \
-    pandas \
-    matplotlib \
-    tabulate
 ENTRYPOINT ["/opt/nvidia/nvidia_entrypoint.sh"]
 CMD []
\ No newline at end of file
--- a/container/Dockerfile.sglang-wideep
+++ b/container/Dockerfile.sglang-wideep
-# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-# SPDX-License-Identifier: Apache-2.0
-ARG SGLANG_IMAGE_TAG="v0.5.3.post2"
-ARG BRANCH_TYPE
-ARG CARGO_BUILD_JOBS
-FROM scratch AS local_src
-COPY . /src
-FROM lmsysorg/sglang:${SGLANG_IMAGE_TAG}
-WORKDIR /sgl-workspace
-ARG DYNAMO_COMMIT_SHA
-ENV DYNAMO_COMMIT_SHA=$DYNAMO_COMMIT_SHA
-# Install jq for JSON processing
-RUN apt-get update -y \
-    && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
-        jq \
-    && apt-get clean \
-    && rm -rf /var/lib/apt/lists/*
-# Install dynamo
-# Providing --build-arg BRANCH_TYPE=local will editable install the local dynamo repo
-# Providing --build-arg BRANCH_TYPE=remote will editable install the remote dynamo repo
-# Default is to install the latest published dynamo version
-ARG BRANCH_TYPE
-ARG CARGO_BUILD_JOBS
-COPY --from=local_src /src /tmp/local_src
-RUN if [ "$BRANCH_TYPE" = "local" ]; then \
-        cp -r /tmp/local_src /sgl-workspace/dynamo; \
-    elif [ "$BRANCH_TYPE" = "remote" ]; then \
-        git clone https://github.com/ai-dynamo/dynamo.git /sgl-workspace/dynamo; \
-    fi
-ENV CARGO_BUILD_JOBS=${CARGO_BUILD_JOBS:-16}
-# SGLang does not use a venv in their container
-RUN if [ "$BRANCH_TYPE" = "local" ]; then \
-    cd dynamo/lib/bindings/python && \
-    pip install --break-system-packages maturin && \
-    maturin build --release && \
-    pip install --break-system-packages target/wheels/*.whl && \
-    cd /sgl-workspace/dynamo && \
-    pip install --break-system-packages -e . && \
-    pip install --break-system-packages --requirement /tmp/local_src/container/deps/requirements.txt ; \
-  elif [ "$BRANCH_TYPE" = "remote" ]; then \
-    cd dynamo/lib/bindings/python && \
-    pip install --break-system-packages maturin && \
-    maturin build --release && \
-    pip install --break-system-packages target/wheels/*.whl && \
-    cd /sgl-workspace/dynamo && \
-    pip install --break-system-packages -e . && \
-    pip install --break-system-packages --requirement /sgl-workspace/dynamo/container/deps/requirements.txt ; \
-  else \
-    pip install --break-system-packages ai-dynamo ; \
-  fi \
-&& rm -rf /tmp/local_src
-# Install NATS and ETCD
-RUN case "$(uname -m)" in \
-      x86_64) ARCH=amd64 ;; \
-      aarch64) ARCH=arm64 ;; \
-      *) echo "Unsupported architecture: $(uname -m)" && exit 1 ;; \
-    esac && \
-    wget --tries=3 --waitretry=5 \
-      https://github.com/nats-io/nats-server/releases/download/v2.10.28/\
-nats-server-v2.10.28-${ARCH}.deb && \
-    dpkg -i nats-server-v2.10.28-${ARCH}.deb && rm nats-server-v2.10.28-${ARCH}.deb
-ENV ETCD_VERSION="v3.5.21"
-RUN case "$(uname -m)" in \
-      x86_64) ARCH=amd64 ;; \
-      aarch64) ARCH=arm64 ;; \
-      *) echo "Unsupported architecture: $(uname -m)" && exit 1 ;; \
-    esac && \
-    wget --tries=3 --waitretry=5 \
-      https://github.com/etcd-io/etcd/releases/download/${ETCD_VERSION}/\
-etcd-${ETCD_VERSION}-linux-${ARCH}.tar.gz -O /tmp/etcd.tar.gz && \
-    mkdir -p /usr/local/bin/etcd && \
-    tar -xzf /tmp/etcd.tar.gz \
-        -C /usr/local/bin/etcd --strip-components=1 && \
-    rm /tmp/etcd.tar.gz
-ENV PATH=/usr/local/bin/etcd:$PATH
-# Enable forceful shutdown of inflight requests
-ENV SGL_FORCE_SHUTDOWN=1
-WORKDIR /sgl-workspace/dynamo/examples/backends/sglang
--- a/container/build.sh
+++ b/container/build.sh
@@ -115,13 +115,13 @@ VLLM_RUNTIME_IMAGE_TAG_CU13="13.0.2-runtime-ubuntu24.04"
 NONE_BASE_IMAGE="nvcr.io/nvidia/cuda-dl-base"
 NONE_BASE_IMAGE_TAG="25.01-cuda12.8-devel-ubuntu24.04"
-SGLANG_CUDA_VERSION="12.9.1"
-# This is for Dockerfile
 SGLANG_BASE_IMAGE="nvcr.io/nvidia/cuda-dl-base"
-SGLANG_BASE_IMAGE_TAG="25.01-cuda12.8-devel-ubuntu24.04"
+SGLANG_BASE_IMAGE_TAG="25.06-cuda12.9-devel-ubuntu24.04"
-# This is for Dockerfile.sglang. Unlike the other frameworks, it is using a different base image
+SGLANG_CUDA_VERSION="12.9.1"
-SGLANG_FRAMEWORK_IMAGE="nvcr.io/nvidia/cuda"
+SGLANG_PYTHON_VERSION="3.10"
-SGLANG_FRAMEWORK_IMAGE_TAG="${SGLANG_CUDA_VERSION}-cudnn-devel-ubuntu24.04"
+PYTHON_VERSION="3.12"
 NIXL_REF=0.8.0
 NIXL_UCX_REF=v1.20.0-rc1
@@ -909,13 +909,10 @@ fi
 if [[ $FRAMEWORK == "SGLANG" ]]; then
    echo "Customizing Python, CUDA, and framework images for sglang images"
-    BUILD_ARGS+=" --build-arg PYTHON_VERSION=3.10"
+    BUILD_ARGS+=" --build-arg PYTHON_VERSION=${SGLANG_PYTHON_VERSION}"
    BUILD_ARGS+=" --build-arg CUDA_VERSION=${SGLANG_CUDA_VERSION}"
-    # Unlike the other two frameworks, SGLang's framework image is different from the base image, so we need to set it explicitly.
-    BUILD_ARGS+=" --build-arg FRAMEWORK_IMAGE=${SGLANG_FRAMEWORK_IMAGE}"
-    BUILD_ARGS+=" --build-arg FRAMEWORK_IMAGE_TAG=${SGLANG_FRAMEWORK_IMAGE_TAG}"
 else
-    BUILD_ARGS+=" --build-arg PYTHON_VERSION=3.12"
+    BUILD_ARGS+=" --build-arg PYTHON_VERSION=${PYTHON_VERSION}"
 fi
 # Add sccache build arguments
 if [ "$USE_SCCACHE" = true ]; then

--- a/container/deps/requirements.txt
+++ b/container/deps/requirements.txt
@@ -19,7 +19,7 @@ av==15.0.0
 fastapi==0.120.1
 ftfy==6.3.1
 genai-perf==0.0.15
-grpcio-tools<=1.66.0  # May have platform-specific builds
+grpcio-tools<=1.76.0  # May have platform-specific builds
 httpx==0.28.1
 kr8s==0.20.13
 kubernetes==32.0.1
@@ -38,7 +38,7 @@ pre-commit==4.5.0
 prometheus-api-client==0.6.0
 prometheus_client==0.23.1
 prophet==1.2.1
-protobuf==5.29.5
+protobuf>=5.29.5,<7.0.0
 pydantic>=2.11.4,<2.13  # vllm==0.12.0 depends on pydantic>=2.12.0
 pyright==1.1.407
 PyYAML==6.0.3
@@ -48,12 +48,12 @@ sentencepiece==0.2.1
 # Required by kr8s
 # https://github.com/kr8s-org/kr8s/blob/750022c3ebbb7988cddb5a979aca2ee8074a1069/examples/kubectl-ng/uv.lock#L988
 sniffio==1.3.1
-tensorboard==2.19.0
+tensorboard>=2.19.0,<2.21.0
 tensorboardX==2.6.2.2
 # Transformers version constraint for container builds
 # - vLLM 0.11.0: >=4.55.2, vLLM 0.11.2: >=4.56.0,<5
 # - TensorRT-LLM 1.2.0rc5: ==4.56.0
-# - SGLang 0.5.6: ==4.57.1
+# - SGLang 0.5.6.post2: ==4.57.1
 # Using >=4.56.0 and <=4.57.1 to satisfy all frameworks
 transformers>=4.56.0,<=4.57.1
 types-aiofiles==25.1.0.20251011

--- a/docs/reference/support-matrix.md
+++ b/docs/reference/support-matrix.md
@@ -62,7 +62,7 @@ The following table shows the dependency versions included with each Dynamo rele
 | **Dependency** | **main (ToT)** | **v0.8.0 (unreleased)** | **v0.7.1** | **v0.7.0.post1** | **v0.7.0** |
 | :------------- | :------------- | :---------------------- | :--------- | :--------------- | :--------- |
-| SGLang         | 0.5.6          | 0.5.6.post1             | 0.5.3.post4| 0.5.3.post4      | 0.5.3.post4|
+| SGLang         | 0.5.6.post2    | 0.5.6.post2             | 0.5.3.post4| 0.5.3.post4      | 0.5.3.post4|
 | TensorRT-LLM   | 1.2.0rc5       | 1.2.0rc6                | 1.2.0rc3   | 1.2.0rc3         | 1.2.0rc2   |
 | vLLM           | 0.12.0         | 0.12.0                  | 0.11.0     | 0.11.0           | 0.11.0     |
 | NIXL           | 0.8.0          | 0.8.0                   | 0.8.0      | 0.8.0            | 0.8.0      |

--- a/pyproject.toml
+++ b/pyproject.toml
@@ -61,8 +61,8 @@ vllm = [
 sglang = [
    "uvloop",
+    "sglang==0.5.6.post2",
    "nixl[cu12]<=0.8.0",
-    "sglang==0.5.6",
 ]
 [project.entry-points.pytest11]