Unverified Commit 9a002b30 authored by ishandhanani's avatar ishandhanani Committed by GitHub
Browse files

fix: add `--host` to k8s and bump sgl version (#3666)

parent ea07d51f
......@@ -56,6 +56,8 @@ spec:
- nixl
- --disaggregation-bootstrap-port
- "30001"
- --host
- "0.0.0.0"
- --mem-fraction-static
- "0.82"
prefill:
......@@ -93,3 +95,5 @@ spec:
- "30001"
- --mem-fraction-static
- "0.82"
- --host
- "0.0.0.0"
\ No newline at end of file
......@@ -46,7 +46,10 @@ spec:
- decode
- --disaggregation-transfer-backend
- nixl
- --disaggregation-bootstrap-port
- "12345"
- --host
- "0.0.0.0"
prefill:
envFromSecret: hf-token-secret
dynamoNamespace: sglang-disagg
......@@ -79,3 +82,7 @@ spec:
- prefill
- --disaggregation-transfer-backend
- nixl
- --disaggregation-bootstrap-port
- "12345"
- --host
- "0.0.0.0"
\ No newline at end of file
......@@ -70,6 +70,10 @@ spec:
- decode
- --disaggregation-transfer-backend
- nixl
- --disaggregation-bootstrap-port
- "12345"
- --host
- "0.0.0.0"
prefill:
dynamoNamespace: dynamo
envFromSecret: hf-token-secret
......@@ -102,3 +106,7 @@ spec:
- prefill
- --disaggregation-transfer-backend
- nixl
- --disaggregation-bootstrap-port
- "12345"
- --host
- "0.0.0.0"
......@@ -14,7 +14,7 @@ ARG RUNTIME_IMAGE="nvcr.io/nvidia/cuda"
ARG RUNTIME_IMAGE_TAG="12.8.1-runtime-ubuntu24.04"
# Make sure to update the dependency version in pyproject.toml when updating this
ARG SGLANG_VERSION="0.5.3.post1"
ARG SGLANG_VERSION="0.5.3.post2"
# Define general architecture ARGs for supporting both x86 and aarch64 builds.
......
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
ARG SGLANG_IMAGE_TAG="v0.5.3.post1"
ARG SGLANG_IMAGE_TAG="v0.5.3.post2"
ARG BRANCH_TYPE
FROM scratch AS local_src
......
......@@ -104,8 +104,8 @@ cd $DYNAMO_HOME
# installs sglang supported version along with dynamo
# include the prerelease flag to install flashinfer rc versions
uv pip install -e .
# install any sglang version >= 0.5.3
uv pip install "sglang[all]==0.5.3.post1"
# install any sglang version >= 0.5.3.post2
uv pip install "sglang[all]==0.5.3.post2"
```
</details>
......
......@@ -58,6 +58,7 @@ python3 -m dynamo.sglang \
--skip-tokenizer-init \
--disaggregation-mode prefill \
--disaggregation-transfer-backend nixl \
--host 0.0.0.0 \
--disaggregation-bootstrap-port 30001 \
--dist-init-addr ${HEAD_PREFILL_NODE_IP}:29500 \
--nnodes 4 \
......@@ -95,6 +96,7 @@ python3 -m dynamo.sglang \
--disaggregation-mode decode \
--disaggregation-transfer-backend nixl \
--disaggregation-bootstrap-port 30001 \
--host 0.0.0.0 \
--dist-init-addr ${HEAD_DECODE_NODE_IP}:29500 \
--nnodes 4 \
--node-rank 0 \
......
......@@ -39,6 +39,7 @@ python3 -m dynamo.sglang \
--disaggregation-mode prefill \
--disaggregation-transfer-backend nixl \
--disaggregation-bootstrap-port 30001 \
--host 0.0.0.0 \
--mem-fraction-static 0.82
```
......@@ -58,6 +59,7 @@ python3 -m dynamo.sglang \
--disaggregation-mode prefill \
--disaggregation-transfer-backend nixl \
--disaggregation-bootstrap-port 30001 \
--host 0.0.0.0 \
--mem-fraction-static 0.82
```
......@@ -77,6 +79,7 @@ python3 -m dynamo.sglang \
--disaggregation-mode decode \
--disaggregation-transfer-backend nixl \
--disaggregation-bootstrap-port 30001 \
--host 0.0.0.0 \
--mem-fraction-static 0.82
```
......@@ -96,6 +99,7 @@ python3 -m dynamo.sglang \
--disaggregation-mode decode \
--disaggregation-transfer-backend nixl \
--disaggregation-bootstrap-port 30001 \
--host 0.0.0.0 \
--mem-fraction-static 0.82
```
......
......@@ -60,7 +60,7 @@ vllm = [
sglang = [
"uvloop",
"nixl<=0.6.0",
"sglang[all]==0.5.3.post1",
"sglang[all]==0.5.3.post2",
]
[dependency-groups]
......
......@@ -67,6 +67,7 @@ spec:
--disaggregation-transfer-backend nixl
--disaggregation-bootstrap-port 30001
--mem-fraction-static 0.8
--host 0.0.0.0
prefill:
dynamoNamespace: sgl-dsr1-16gpu
componentType: worker
......@@ -108,3 +109,4 @@ spec:
--disaggregation-transfer-backend nixl
--disaggregation-bootstrap-port 30001
--mem-fraction-static 0.8
--host 0.0.0.0
\ No newline at end of file
......@@ -64,6 +64,7 @@ spec:
--disaggregation-mode decode
--disaggregation-transfer-backend nixl
--disaggregation-bootstrap-port 30001
--host 0.0.0.0
prefill:
dynamoNamespace: sgl-dsr1-8gpu
componentType: worker
......@@ -102,3 +103,4 @@ spec:
--disaggregation-mode prefill
--disaggregation-transfer-backend nixl
--disaggregation-bootstrap-port 30001
--host 0.0.0.0
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment