Unverified Commit 8deb3ea4 authored by ishandhanani's avatar ishandhanani Committed by GitHub
Browse files

chore: bump sgl and update dp attn yaml (#1362)

parent ac53c0bb
...@@ -136,8 +136,9 @@ RUN if [ "$ARCH" = "arm64" ]; then \ ...@@ -136,8 +136,9 @@ RUN if [ "$ARCH" = "arm64" ]; then \
# Install sglang # Install sglang
# Once either 0.4.6post6 or 0.4.7 is released, we can switch back to using the published version # Once either 0.4.6post6 or 0.4.7 is released, we can switch back to using the published version
# This commit references a fix for DP attention and NIXL https://github.com/sgl-project/sglang/pull/6473 # This commit references multiple perf fixes for DP attention and NIXL https://github.com/sgl-project/sglang/pull/6780
ARG SGLANG_COMMIT="e806f708c954020bda7d1cc98035a44fd6a4eb96" # 6/2(ishan) - moving to ToT for performance purposes
ARG SGLANG_COMMIT="6376b632eb4daef306b89ede0eabdcb89ddff728"
RUN --mount=type=cache,target=/root/.cache/uv \ RUN --mount=type=cache,target=/root/.cache/uv \
git clone https://github.com/sgl-project/sglang.git && \ git clone https://github.com/sgl-project/sglang.git && \
cd sglang && \ cd sglang && \
......
...@@ -14,7 +14,7 @@ ...@@ -14,7 +14,7 @@
# limitations under the License. # limitations under the License.
Frontend: Frontend:
served_model_name: deepseek-ai/DeepSeek-R1-Distill-Llama-8B served_model_name: silence09/DeepSeek-R1-Small-2layers
endpoint: dynamo.SGLangWorker.generate endpoint: dynamo.SGLangWorker.generate
port: 8000 port: 8000
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment