Unverified Commit 3e0459fb authored by ishandhanani's avatar ishandhanani Committed by GitHub
Browse files

feat: bump sglang to `0.5.6.post2` and swap to upstream runtime container (#4762)


Signed-off-by: default avatarDillon Cullinan <dcullinan@nvidia.com>
Signed-off-by: default avatarDmitry Tokarev <dtokarev@nvidia.com>
Co-authored-by: default avatarDillon Cullinan <dcullinan@nvidia.com>
Co-authored-by: default avatarDmitry Tokarev <dtokarev@nvidia.com>
parent f4245c99
......@@ -164,8 +164,10 @@ runs:
# Run the sanity check script inside the container
# The script is located in /workspace/deploy/sanity_check.py in runtime containers
export WORKSPACE=/workspace
set +e
docker run --rm "$IMAGE_TAG" python /workspace/deploy/sanity_check.py --runtime-check --no-gpu-check
docker run --rm "$IMAGE_TAG" python ${WORKSPACE}/deploy/sanity_check.py --runtime-check --no-gpu-check
SANITY_CHECK_EXIT_CODE=$?
set -e
if [ ${SANITY_CHECK_EXIT_CODE} -ne 0 ]; then
......
......@@ -46,7 +46,7 @@ dependencies = [
"pydantic>=2",
"tabulate",
"types-tabulate",
# Satisfies vLLM 0.11.0 (>=4.55.2), vLLM 0.11.2 (>=4.56.0,<5), TRT-LLM 1.2.0rc5 (==4.56.0), SGLang 0.5.6 (==4.57.1)
# Satisfies vLLM 0.11.0 (>=4.55.2), vLLM 0.11.2 (>=4.56.0,<5), TRT-LLM 1.2.0rc5 (==4.56.0), SGLang 0.5.6.post2 (==4.57.1)
"transformers>=4.56.0,<=4.57.1",
"pytest-mypy",
]
......
This diff is collapsed.
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
ARG SGLANG_IMAGE_TAG="v0.5.3.post2"
ARG BRANCH_TYPE
ARG CARGO_BUILD_JOBS
FROM scratch AS local_src
COPY . /src
FROM lmsysorg/sglang:${SGLANG_IMAGE_TAG}
WORKDIR /sgl-workspace
ARG DYNAMO_COMMIT_SHA
ENV DYNAMO_COMMIT_SHA=$DYNAMO_COMMIT_SHA
# Install jq for JSON processing
RUN apt-get update -y \
&& DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
jq \
&& apt-get clean \
&& rm -rf /var/lib/apt/lists/*
# Install dynamo
# Providing --build-arg BRANCH_TYPE=local will editable install the local dynamo repo
# Providing --build-arg BRANCH_TYPE=remote will editable install the remote dynamo repo
# Default is to install the latest published dynamo version
ARG BRANCH_TYPE
ARG CARGO_BUILD_JOBS
COPY --from=local_src /src /tmp/local_src
RUN if [ "$BRANCH_TYPE" = "local" ]; then \
cp -r /tmp/local_src /sgl-workspace/dynamo; \
elif [ "$BRANCH_TYPE" = "remote" ]; then \
git clone https://github.com/ai-dynamo/dynamo.git /sgl-workspace/dynamo; \
fi
ENV CARGO_BUILD_JOBS=${CARGO_BUILD_JOBS:-16}
# SGLang does not use a venv in their container
RUN if [ "$BRANCH_TYPE" = "local" ]; then \
cd dynamo/lib/bindings/python && \
pip install --break-system-packages maturin && \
maturin build --release && \
pip install --break-system-packages target/wheels/*.whl && \
cd /sgl-workspace/dynamo && \
pip install --break-system-packages -e . && \
pip install --break-system-packages --requirement /tmp/local_src/container/deps/requirements.txt ; \
elif [ "$BRANCH_TYPE" = "remote" ]; then \
cd dynamo/lib/bindings/python && \
pip install --break-system-packages maturin && \
maturin build --release && \
pip install --break-system-packages target/wheels/*.whl && \
cd /sgl-workspace/dynamo && \
pip install --break-system-packages -e . && \
pip install --break-system-packages --requirement /sgl-workspace/dynamo/container/deps/requirements.txt ; \
else \
pip install --break-system-packages ai-dynamo ; \
fi \
&& rm -rf /tmp/local_src
# Install NATS and ETCD
RUN case "$(uname -m)" in \
x86_64) ARCH=amd64 ;; \
aarch64) ARCH=arm64 ;; \
*) echo "Unsupported architecture: $(uname -m)" && exit 1 ;; \
esac && \
wget --tries=3 --waitretry=5 \
https://github.com/nats-io/nats-server/releases/download/v2.10.28/\
nats-server-v2.10.28-${ARCH}.deb && \
dpkg -i nats-server-v2.10.28-${ARCH}.deb && rm nats-server-v2.10.28-${ARCH}.deb
ENV ETCD_VERSION="v3.5.21"
RUN case "$(uname -m)" in \
x86_64) ARCH=amd64 ;; \
aarch64) ARCH=arm64 ;; \
*) echo "Unsupported architecture: $(uname -m)" && exit 1 ;; \
esac && \
wget --tries=3 --waitretry=5 \
https://github.com/etcd-io/etcd/releases/download/${ETCD_VERSION}/\
etcd-${ETCD_VERSION}-linux-${ARCH}.tar.gz -O /tmp/etcd.tar.gz && \
mkdir -p /usr/local/bin/etcd && \
tar -xzf /tmp/etcd.tar.gz \
-C /usr/local/bin/etcd --strip-components=1 && \
rm /tmp/etcd.tar.gz
ENV PATH=/usr/local/bin/etcd:$PATH
# Enable forceful shutdown of inflight requests
ENV SGL_FORCE_SHUTDOWN=1
WORKDIR /sgl-workspace/dynamo/examples/backends/sglang
......@@ -115,13 +115,13 @@ VLLM_RUNTIME_IMAGE_TAG_CU13="13.0.2-runtime-ubuntu24.04"
NONE_BASE_IMAGE="nvcr.io/nvidia/cuda-dl-base"
NONE_BASE_IMAGE_TAG="25.01-cuda12.8-devel-ubuntu24.04"
SGLANG_CUDA_VERSION="12.9.1"
# This is for Dockerfile
SGLANG_BASE_IMAGE="nvcr.io/nvidia/cuda-dl-base"
SGLANG_BASE_IMAGE_TAG="25.01-cuda12.8-devel-ubuntu24.04"
# This is for Dockerfile.sglang. Unlike the other frameworks, it is using a different base image
SGLANG_FRAMEWORK_IMAGE="nvcr.io/nvidia/cuda"
SGLANG_FRAMEWORK_IMAGE_TAG="${SGLANG_CUDA_VERSION}-cudnn-devel-ubuntu24.04"
SGLANG_BASE_IMAGE_TAG="25.06-cuda12.9-devel-ubuntu24.04"
SGLANG_CUDA_VERSION="12.9.1"
SGLANG_PYTHON_VERSION="3.10"
PYTHON_VERSION="3.12"
NIXL_REF=0.8.0
NIXL_UCX_REF=v1.20.0-rc1
......@@ -909,13 +909,10 @@ fi
if [[ $FRAMEWORK == "SGLANG" ]]; then
echo "Customizing Python, CUDA, and framework images for sglang images"
BUILD_ARGS+=" --build-arg PYTHON_VERSION=3.10"
BUILD_ARGS+=" --build-arg PYTHON_VERSION=${SGLANG_PYTHON_VERSION}"
BUILD_ARGS+=" --build-arg CUDA_VERSION=${SGLANG_CUDA_VERSION}"
# Unlike the other two frameworks, SGLang's framework image is different from the base image, so we need to set it explicitly.
BUILD_ARGS+=" --build-arg FRAMEWORK_IMAGE=${SGLANG_FRAMEWORK_IMAGE}"
BUILD_ARGS+=" --build-arg FRAMEWORK_IMAGE_TAG=${SGLANG_FRAMEWORK_IMAGE_TAG}"
else
BUILD_ARGS+=" --build-arg PYTHON_VERSION=3.12"
BUILD_ARGS+=" --build-arg PYTHON_VERSION=${PYTHON_VERSION}"
fi
# Add sccache build arguments
if [ "$USE_SCCACHE" = true ]; then
......
......@@ -19,7 +19,7 @@ av==15.0.0
fastapi==0.120.1
ftfy==6.3.1
genai-perf==0.0.15
grpcio-tools<=1.66.0 # May have platform-specific builds
grpcio-tools<=1.76.0 # May have platform-specific builds
httpx==0.28.1
kr8s==0.20.13
kubernetes==32.0.1
......@@ -38,7 +38,7 @@ pre-commit==4.5.0
prometheus-api-client==0.6.0
prometheus_client==0.23.1
prophet==1.2.1
protobuf==5.29.5
protobuf>=5.29.5,<7.0.0
pydantic>=2.11.4,<2.13 # vllm==0.12.0 depends on pydantic>=2.12.0
pyright==1.1.407
PyYAML==6.0.3
......@@ -48,12 +48,12 @@ sentencepiece==0.2.1
# Required by kr8s
# https://github.com/kr8s-org/kr8s/blob/750022c3ebbb7988cddb5a979aca2ee8074a1069/examples/kubectl-ng/uv.lock#L988
sniffio==1.3.1
tensorboard==2.19.0
tensorboard>=2.19.0,<2.21.0
tensorboardX==2.6.2.2
# Transformers version constraint for container builds
# - vLLM 0.11.0: >=4.55.2, vLLM 0.11.2: >=4.56.0,<5
# - TensorRT-LLM 1.2.0rc5: ==4.56.0
# - SGLang 0.5.6: ==4.57.1
# - SGLang 0.5.6.post2: ==4.57.1
# Using >=4.56.0 and <=4.57.1 to satisfy all frameworks
transformers>=4.56.0,<=4.57.1
types-aiofiles==25.1.0.20251011
......
......@@ -62,7 +62,7 @@ The following table shows the dependency versions included with each Dynamo rele
| **Dependency** | **main (ToT)** | **v0.8.0 (unreleased)** | **v0.7.1** | **v0.7.0.post1** | **v0.7.0** |
| :------------- | :------------- | :---------------------- | :--------- | :--------------- | :--------- |
| SGLang | 0.5.6 | 0.5.6.post1 | 0.5.3.post4| 0.5.3.post4 | 0.5.3.post4|
| SGLang | 0.5.6.post2 | 0.5.6.post2 | 0.5.3.post4| 0.5.3.post4 | 0.5.3.post4|
| TensorRT-LLM | 1.2.0rc5 | 1.2.0rc6 | 1.2.0rc3 | 1.2.0rc3 | 1.2.0rc2 |
| vLLM | 0.12.0 | 0.12.0 | 0.11.0 | 0.11.0 | 0.11.0 |
| NIXL | 0.8.0 | 0.8.0 | 0.8.0 | 0.8.0 | 0.8.0 |
......
......@@ -61,8 +61,8 @@ vllm = [
sglang = [
"uvloop",
"sglang==0.5.6.post2",
"nixl[cu12]<=0.8.0",
"sglang==0.5.6",
]
[project.entry-points.pytest11]
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment