chore: bump sglang version (#1219)

811b10a6 · ishandhanani · GitHub · c12f61a6 · 811b10a6 · 811b10a6
Unverified Commit 811b10a6 authored May 27, 2025 by ishandhanani Committed by GitHub May 28, 2025
4 changed files
--- a/container/Dockerfile.sglang
+++ b/container/Dockerfile.sglang
@@ -136,16 +136,14 @@ RUN if [ "$ARCH" = "arm64" ]; then \
    fi

 # Install sglang
-# TODO: NIXL transfer is currently broken as of https://github.com/sgl-project/sglang/commit/7513558074adc4c4015b68e2ae7cf719d3401d5d
-# Once this is fixed we will have to install from that commit until a new post is released
-ARG SGLANG_COMMIT="4d643f6c7a291c86de64a9e52eca526b2d99775d"
+# Once either 0.4.6post6 or 0.4.7 is released, we can switch back to using the published version
+# This commit references a fix for DP attention and NIXL https://github.com/sgl-project/sglang/pull/6473
+ARG SGLANG_COMMIT="e806f708c954020bda7d1cc98035a44fd6a4eb96"
 RUN --mount=type=cache,target=/root/.cache/uv \
    git clone https://github.com/sgl-project/sglang.git && \
    cd sglang && \
    git checkout ${SGLANG_COMMIT} && \
-    uv pip install -e "python[all]" && \
-    cd .. && \
-    rm -rf sglang
+    uv pip install -e "python[all]"

 # Common dependencies
 RUN --mount=type=bind,source=./container/deps/requirements.txt,target=/tmp/requirements.txt \

--- a/examples/sglang/README.md
+++ b/examples/sglang/README.md
@@ -77,3 +77,13 @@ Because Dynamo has a discovery mechanism, we do not use a load balancer. Instead
 cd /workspace/examples/sglang
 dynamo serve graphs.disagg:Frontend -f ./configs/disagg.yaml
 ```
+
+##### Disaggregated with MoE and DP attention
+
+SGLang also supports DP attention for MoE models. We provide an example config for this in `configs/disagg-dp-attention.yaml` which is based on the [DeepSeek-R1-Small-2layers](https://huggingface.co/silence09/DeepSeek-R1-Small-2layers) model. You can use this configuration to test out disaggregated serving on a single node before scaling to the full DeepSeek-R1 model across multiple nodes.
+
+```bash
+# note this will require 4 GPUs
+cd /workspace/examples/sglang
+dynamo serve graphs.disagg:Frontend -f ./configs/disagg-dp-attention.yaml
+```
--- a/examples/sglang/configs/disagg-dp-attention.yaml
+++ b/examples/sglang/configs/disagg-dp-attention.yaml
+# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+Frontend:
+  served_model_name: deepseek-ai/DeepSeek-R1-Distill-Llama-8B
+  endpoint: dynamo.SGLangWorker.generate
+  port: 8000
+
+SGLangWorker:
+  model-path: silence09/DeepSeek-R1-Small-2layers
+  served-model-name: silence09/DeepSeek-R1-Small-2layers
+  tp: 2
+  dp-size: 2
+  enable-dp-attention: true
+  trust-remote-code: true
+  skip-tokenizer-init: true
+  disaggregation-mode: prefill
+  disaggregation-transfer-backend: nixl
+  port: 30000
+  ServiceArgs:
+    workers: 1
+    resources:
+      gpu: 2
+
+SGLangDecodeWorker:
+  model-path: silence09/DeepSeek-R1-Small-2layers
+  served-model-name: silence09/DeepSeek-R1-Small-2layers
+  tp: 2
+  dp-size: 2
+  enable-dp-attention: true
+  trust-remote-code: true
+  skip-tokenizer-init: true
+  disaggregation-mode: decode
+  disaggregation-transfer-backend: nixl
+  # SGLang requires a port delta between prefill and decode workers when using enable-dp-attention
+  port: 31000
+  ServiceArgs:
+    workers: 1
+    resources:
+      gpu: 2
\ No newline at end of file
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -67,7 +67,7 @@ vllm = [
 ]

 sglang = [
-    "sglang[all]@git+https://github.com/sgl-project/sglang@4d643f6c7a291c86de64a9e52eca526b2d99775d#subdirectory=python"
+    "sglang[all]@git+https://github.com/sgl-project/sglang@e806f708c954020bda7d1cc98035a44fd6a4eb96#subdirectory=python"
 ]

 [project.scripts]