Unverified Commit 9a002b30 authored by ishandhanani's avatar ishandhanani Committed by GitHub
Browse files

fix: add `--host` to k8s and bump sgl version (#3666)

parent ea07d51f
...@@ -56,6 +56,8 @@ spec: ...@@ -56,6 +56,8 @@ spec:
- nixl - nixl
- --disaggregation-bootstrap-port - --disaggregation-bootstrap-port
- "30001" - "30001"
- --host
- "0.0.0.0"
- --mem-fraction-static - --mem-fraction-static
- "0.82" - "0.82"
prefill: prefill:
...@@ -93,3 +95,5 @@ spec: ...@@ -93,3 +95,5 @@ spec:
- "30001" - "30001"
- --mem-fraction-static - --mem-fraction-static
- "0.82" - "0.82"
- --host
- "0.0.0.0"
\ No newline at end of file
...@@ -46,7 +46,10 @@ spec: ...@@ -46,7 +46,10 @@ spec:
- decode - decode
- --disaggregation-transfer-backend - --disaggregation-transfer-backend
- nixl - nixl
- --disaggregation-bootstrap-port
- "12345"
- --host
- "0.0.0.0"
prefill: prefill:
envFromSecret: hf-token-secret envFromSecret: hf-token-secret
dynamoNamespace: sglang-disagg dynamoNamespace: sglang-disagg
...@@ -79,3 +82,7 @@ spec: ...@@ -79,3 +82,7 @@ spec:
- prefill - prefill
- --disaggregation-transfer-backend - --disaggregation-transfer-backend
- nixl - nixl
- --disaggregation-bootstrap-port
- "12345"
- --host
- "0.0.0.0"
\ No newline at end of file
...@@ -70,6 +70,10 @@ spec: ...@@ -70,6 +70,10 @@ spec:
- decode - decode
- --disaggregation-transfer-backend - --disaggregation-transfer-backend
- nixl - nixl
- --disaggregation-bootstrap-port
- "12345"
- --host
- "0.0.0.0"
prefill: prefill:
dynamoNamespace: dynamo dynamoNamespace: dynamo
envFromSecret: hf-token-secret envFromSecret: hf-token-secret
...@@ -102,3 +106,7 @@ spec: ...@@ -102,3 +106,7 @@ spec:
- prefill - prefill
- --disaggregation-transfer-backend - --disaggregation-transfer-backend
- nixl - nixl
- --disaggregation-bootstrap-port
- "12345"
- --host
- "0.0.0.0"
...@@ -14,7 +14,7 @@ ARG RUNTIME_IMAGE="nvcr.io/nvidia/cuda" ...@@ -14,7 +14,7 @@ ARG RUNTIME_IMAGE="nvcr.io/nvidia/cuda"
ARG RUNTIME_IMAGE_TAG="12.8.1-runtime-ubuntu24.04" ARG RUNTIME_IMAGE_TAG="12.8.1-runtime-ubuntu24.04"
# Make sure to update the dependency version in pyproject.toml when updating this # Make sure to update the dependency version in pyproject.toml when updating this
ARG SGLANG_VERSION="0.5.3.post1" ARG SGLANG_VERSION="0.5.3.post2"
# Define general architecture ARGs for supporting both x86 and aarch64 builds. # Define general architecture ARGs for supporting both x86 and aarch64 builds.
......
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0 # SPDX-License-Identifier: Apache-2.0
ARG SGLANG_IMAGE_TAG="v0.5.3.post1" ARG SGLANG_IMAGE_TAG="v0.5.3.post2"
ARG BRANCH_TYPE ARG BRANCH_TYPE
FROM scratch AS local_src FROM scratch AS local_src
......
...@@ -104,8 +104,8 @@ cd $DYNAMO_HOME ...@@ -104,8 +104,8 @@ cd $DYNAMO_HOME
# installs sglang supported version along with dynamo # installs sglang supported version along with dynamo
# include the prerelease flag to install flashinfer rc versions # include the prerelease flag to install flashinfer rc versions
uv pip install -e . uv pip install -e .
# install any sglang version >= 0.5.3 # install any sglang version >= 0.5.3.post2
uv pip install "sglang[all]==0.5.3.post1" uv pip install "sglang[all]==0.5.3.post2"
``` ```
</details> </details>
......
...@@ -58,6 +58,7 @@ python3 -m dynamo.sglang \ ...@@ -58,6 +58,7 @@ python3 -m dynamo.sglang \
--skip-tokenizer-init \ --skip-tokenizer-init \
--disaggregation-mode prefill \ --disaggregation-mode prefill \
--disaggregation-transfer-backend nixl \ --disaggregation-transfer-backend nixl \
--host 0.0.0.0 \
--disaggregation-bootstrap-port 30001 \ --disaggregation-bootstrap-port 30001 \
--dist-init-addr ${HEAD_PREFILL_NODE_IP}:29500 \ --dist-init-addr ${HEAD_PREFILL_NODE_IP}:29500 \
--nnodes 4 \ --nnodes 4 \
...@@ -95,6 +96,7 @@ python3 -m dynamo.sglang \ ...@@ -95,6 +96,7 @@ python3 -m dynamo.sglang \
--disaggregation-mode decode \ --disaggregation-mode decode \
--disaggregation-transfer-backend nixl \ --disaggregation-transfer-backend nixl \
--disaggregation-bootstrap-port 30001 \ --disaggregation-bootstrap-port 30001 \
--host 0.0.0.0 \
--dist-init-addr ${HEAD_DECODE_NODE_IP}:29500 \ --dist-init-addr ${HEAD_DECODE_NODE_IP}:29500 \
--nnodes 4 \ --nnodes 4 \
--node-rank 0 \ --node-rank 0 \
......
...@@ -39,6 +39,7 @@ python3 -m dynamo.sglang \ ...@@ -39,6 +39,7 @@ python3 -m dynamo.sglang \
--disaggregation-mode prefill \ --disaggregation-mode prefill \
--disaggregation-transfer-backend nixl \ --disaggregation-transfer-backend nixl \
--disaggregation-bootstrap-port 30001 \ --disaggregation-bootstrap-port 30001 \
--host 0.0.0.0 \
--mem-fraction-static 0.82 --mem-fraction-static 0.82
``` ```
...@@ -58,6 +59,7 @@ python3 -m dynamo.sglang \ ...@@ -58,6 +59,7 @@ python3 -m dynamo.sglang \
--disaggregation-mode prefill \ --disaggregation-mode prefill \
--disaggregation-transfer-backend nixl \ --disaggregation-transfer-backend nixl \
--disaggregation-bootstrap-port 30001 \ --disaggregation-bootstrap-port 30001 \
--host 0.0.0.0 \
--mem-fraction-static 0.82 --mem-fraction-static 0.82
``` ```
...@@ -77,6 +79,7 @@ python3 -m dynamo.sglang \ ...@@ -77,6 +79,7 @@ python3 -m dynamo.sglang \
--disaggregation-mode decode \ --disaggregation-mode decode \
--disaggregation-transfer-backend nixl \ --disaggregation-transfer-backend nixl \
--disaggregation-bootstrap-port 30001 \ --disaggregation-bootstrap-port 30001 \
--host 0.0.0.0 \
--mem-fraction-static 0.82 --mem-fraction-static 0.82
``` ```
...@@ -96,6 +99,7 @@ python3 -m dynamo.sglang \ ...@@ -96,6 +99,7 @@ python3 -m dynamo.sglang \
--disaggregation-mode decode \ --disaggregation-mode decode \
--disaggregation-transfer-backend nixl \ --disaggregation-transfer-backend nixl \
--disaggregation-bootstrap-port 30001 \ --disaggregation-bootstrap-port 30001 \
--host 0.0.0.0 \
--mem-fraction-static 0.82 --mem-fraction-static 0.82
``` ```
......
...@@ -60,7 +60,7 @@ vllm = [ ...@@ -60,7 +60,7 @@ vllm = [
sglang = [ sglang = [
"uvloop", "uvloop",
"nixl<=0.6.0", "nixl<=0.6.0",
"sglang[all]==0.5.3.post1", "sglang[all]==0.5.3.post2",
] ]
[dependency-groups] [dependency-groups]
......
...@@ -67,6 +67,7 @@ spec: ...@@ -67,6 +67,7 @@ spec:
--disaggregation-transfer-backend nixl --disaggregation-transfer-backend nixl
--disaggregation-bootstrap-port 30001 --disaggregation-bootstrap-port 30001
--mem-fraction-static 0.8 --mem-fraction-static 0.8
--host 0.0.0.0
prefill: prefill:
dynamoNamespace: sgl-dsr1-16gpu dynamoNamespace: sgl-dsr1-16gpu
componentType: worker componentType: worker
...@@ -107,4 +108,5 @@ spec: ...@@ -107,4 +108,5 @@ spec:
--disaggregation-mode prefill --disaggregation-mode prefill
--disaggregation-transfer-backend nixl --disaggregation-transfer-backend nixl
--disaggregation-bootstrap-port 30001 --disaggregation-bootstrap-port 30001
--mem-fraction-static 0.8 --mem-fraction-static 0.8
\ No newline at end of file --host 0.0.0.0
\ No newline at end of file
...@@ -64,6 +64,7 @@ spec: ...@@ -64,6 +64,7 @@ spec:
--disaggregation-mode decode --disaggregation-mode decode
--disaggregation-transfer-backend nixl --disaggregation-transfer-backend nixl
--disaggregation-bootstrap-port 30001 --disaggregation-bootstrap-port 30001
--host 0.0.0.0
prefill: prefill:
dynamoNamespace: sgl-dsr1-8gpu dynamoNamespace: sgl-dsr1-8gpu
componentType: worker componentType: worker
...@@ -101,4 +102,5 @@ spec: ...@@ -101,4 +102,5 @@ spec:
--skip-tokenizer-init --skip-tokenizer-init
--disaggregation-mode prefill --disaggregation-mode prefill
--disaggregation-transfer-backend nixl --disaggregation-transfer-backend nixl
--disaggregation-bootstrap-port 30001 --disaggregation-bootstrap-port 30001
\ No newline at end of file --host 0.0.0.0
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment