Unverified Commit 8deb3ea4 authored by ishandhanani's avatar ishandhanani Committed by GitHub
Browse files

chore: bump sgl and update dp attn yaml (#1362)

parent ac53c0bb
......@@ -136,8 +136,9 @@ RUN if [ "$ARCH" = "arm64" ]; then \
# Install sglang
# Once either 0.4.6post6 or 0.4.7 is released, we can switch back to using the published version
# This commit references a fix for DP attention and NIXL https://github.com/sgl-project/sglang/pull/6473
ARG SGLANG_COMMIT="e806f708c954020bda7d1cc98035a44fd6a4eb96"
# This commit references multiple perf fixes for DP attention and NIXL https://github.com/sgl-project/sglang/pull/6780
# 6/2(ishan) - moving to ToT for performance purposes
ARG SGLANG_COMMIT="6376b632eb4daef306b89ede0eabdcb89ddff728"
RUN --mount=type=cache,target=/root/.cache/uv \
git clone https://github.com/sgl-project/sglang.git && \
cd sglang && \
......
......@@ -14,7 +14,7 @@
# limitations under the License.
Frontend:
served_model_name: deepseek-ai/DeepSeek-R1-Distill-Llama-8B
served_model_name: silence09/DeepSeek-R1-Small-2layers
endpoint: dynamo.SGLangWorker.generate
port: 8000
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment