Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
dynamo
Commits
cfc6178a
Unverified
Commit
cfc6178a
authored
Jul 28, 2025
by
Biswa Panda
Committed by
GitHub
Jul 28, 2025
Browse files
feat: add sglang disagg deployment examples (#2137)
parent
f8096590
Changes
6
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
256 additions
and
5 deletions
+256
-5
components/backends/sglang/deploy/agg.yaml
components/backends/sglang/deploy/agg.yaml
+1
-1
components/backends/sglang/deploy/agg_router.yaml
components/backends/sglang/deploy/agg_router.yaml
+96
-0
components/backends/sglang/deploy/disagg.yaml
components/backends/sglang/deploy/disagg.yaml
+155
-0
components/backends/sglang/launch/agg.sh
components/backends/sglang/launch/agg.sh
+1
-1
components/backends/sglang/launch/disagg.sh
components/backends/sglang/launch/disagg.sh
+1
-1
components/backends/sglang/launch/disagg_dp_attn.sh
components/backends/sglang/launch/disagg_dp_attn.sh
+2
-2
No files found.
components/backends/sglang/deploy/agg.yaml
View file @
cfc6178a
...
@@ -42,7 +42,7 @@ spec:
...
@@ -42,7 +42,7 @@ spec:
workingDir
:
/workspace/components/backends/sglang
workingDir
:
/workspace/components/backends/sglang
command
:
[
"
sh"
,
"
-c"
]
command
:
[
"
sh"
,
"
-c"
]
args
:
args
:
-
"
python3
-m
dynamo.sglang.utils.clear_namespace
--namespace
dynamo
&&
python3
-m
dynamo.frontend"
-
"
python3
-m
dynamo.sglang.utils.clear_namespace
--namespace
sglang-agg
&&
python3
-m
dynamo.frontend
--http-port=8000
"
SGLangDecodeWorker
:
SGLangDecodeWorker
:
envFromSecret
:
hf-token-secret
envFromSecret
:
hf-token-secret
livenessProbe
:
livenessProbe
:
...
...
components/backends/sglang/deploy/agg_router.yaml
0 → 100644
View file @
cfc6178a
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
apiVersion
:
nvidia.com/v1alpha1
kind
:
DynamoGraphDeployment
metadata
:
name
:
sglang-agg-router
spec
:
services
:
Frontend
:
livenessProbe
:
httpGet
:
path
:
/health
port
:
8000
initialDelaySeconds
:
60
periodSeconds
:
60
timeoutSeconds
:
30
failureThreshold
:
10
readinessProbe
:
exec
:
command
:
-
/bin/sh
-
-c
-
"
exit
0"
initialDelaySeconds
:
60
periodSeconds
:
60
timeoutSeconds
:
30
failureThreshold
:
10
dynamoNamespace
:
sglang-agg-router
componentType
:
main
replicas
:
1
resources
:
requests
:
cpu
:
"
5"
memory
:
"
10Gi"
limits
:
cpu
:
"
5"
memory
:
"
10Gi"
extraPodSpec
:
mainContainer
:
image
:
my-registry/sglang-runtime:my-tag
workingDir
:
/workspace/components/backends/sglang
command
:
[
"
sh"
,
"
-c"
]
args
:
-
"
python3
-m
dynamo.sglang.utils.clear_namespace
--namespace
sglang-agg-router
&&
python3
-m
dynamo.frontend
--http-port=8000
--router-mode
kv"
SGLangDecodeWorker
:
envFromSecret
:
hf-token-secret
livenessProbe
:
exec
:
command
:
-
/bin/sh
-
-c
-
"
exit
0"
periodSeconds
:
60
timeoutSeconds
:
30
failureThreshold
:
10
readinessProbe
:
exec
:
command
:
-
/bin/sh
-
-c
-
"
exit
0"
initialDelaySeconds
:
60
periodSeconds
:
60
timeoutSeconds
:
30
failureThreshold
:
10
dynamoNamespace
:
sglang-agg-router
componentType
:
worker
replicas
:
1
resources
:
requests
:
cpu
:
"
10"
memory
:
"
20Gi"
gpu
:
"
1"
limits
:
cpu
:
"
10"
memory
:
"
20Gi"
gpu
:
"
1"
extraPodSpec
:
mainContainer
:
image
:
my-registry/sglang-runtime:my-tag
workingDir
:
/workspace/components/backends/sglang
args
:
-
"
python3"
-
"
-m"
-
"
dynamo.sglang.worker"
-
"
--model-path"
-
"
deepseek-ai/DeepSeek-R1-Distill-Llama-8B"
-
"
--served-model-name"
-
"
deepseek-ai/DeepSeek-R1-Distill-Llama-8B"
-
"
--page-size"
-
"
16"
-
"
--tp"
-
"
1"
-
"
--trust-remote-code"
-
"
--skip-tokenizer-init"
components/backends/sglang/deploy/disagg.yaml
0 → 100644
View file @
cfc6178a
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
apiVersion
:
nvidia.com/v1alpha1
kind
:
DynamoGraphDeployment
metadata
:
name
:
sglang-disagg
spec
:
services
:
Frontend
:
livenessProbe
:
httpGet
:
path
:
/health
port
:
8000
initialDelaySeconds
:
60
periodSeconds
:
60
timeoutSeconds
:
30
failureThreshold
:
10
readinessProbe
:
exec
:
command
:
-
/bin/sh
-
-c
-
"
exit
0"
initialDelaySeconds
:
60
periodSeconds
:
60
timeoutSeconds
:
30
failureThreshold
:
10
dynamoNamespace
:
sglang-disagg
componentType
:
main
replicas
:
1
resources
:
requests
:
cpu
:
"
5"
memory
:
"
10Gi"
limits
:
cpu
:
"
5"
memory
:
"
10Gi"
extraPodSpec
:
mainContainer
:
image
:
my-registry/sglang-runtime:my-tag
workingDir
:
/workspace/components/backends/sglang
command
:
[
"
sh"
,
"
-c"
]
args
:
-
"
python3
-m
dynamo.sglang.utils.clear_namespace
--namespace
sglang-disagg
&&
python3
-m
dynamo.frontend
--http-port=8000"
SGLangDecodeWorker
:
envFromSecret
:
hf-token-secret
livenessProbe
:
exec
:
command
:
-
/bin/sh
-
-c
-
"
exit
0"
periodSeconds
:
60
timeoutSeconds
:
30
failureThreshold
:
10
readinessProbe
:
exec
:
command
:
-
/bin/sh
-
-c
-
"
exit
0"
initialDelaySeconds
:
60
periodSeconds
:
60
timeoutSeconds
:
30
failureThreshold
:
10
dynamoNamespace
:
sglang-disagg
componentType
:
worker
replicas
:
1
resources
:
requests
:
cpu
:
"
10"
memory
:
"
20Gi"
gpu
:
"
1"
limits
:
cpu
:
"
10"
memory
:
"
20Gi"
gpu
:
"
1"
extraPodSpec
:
mainContainer
:
image
:
my-registry/sglang-runtime:my-tag
workingDir
:
/workspace/components/backends/sglang
args
:
-
"
python3"
-
"
-m"
-
"
dynamo.sglang.worker"
-
"
--model-path"
-
"
deepseek-ai/DeepSeek-R1-Distill-Llama-8B"
-
"
--served-model-name"
-
"
deepseek-ai/DeepSeek-R1-Distill-Llama-8B"
-
"
--page-size"
-
"
16"
-
"
--tp"
-
"
1"
-
"
--trust-remote-code"
-
"
--skip-tokenizer-init"
-
"
--disaggregation-mode"
-
"
decode"
-
"
--disaggregation-transfer-backend"
-
"
nixl"
SGLangPrefillWorker
:
envFromSecret
:
hf-token-secret
livenessProbe
:
exec
:
command
:
-
/bin/sh
-
-c
-
"
exit
0"
periodSeconds
:
60
timeoutSeconds
:
30
failureThreshold
:
10
readinessProbe
:
exec
:
command
:
-
/bin/sh
-
-c
-
"
exit
0"
initialDelaySeconds
:
60
periodSeconds
:
60
timeoutSeconds
:
30
failureThreshold
:
10
dynamoNamespace
:
sglang-disagg
componentType
:
worker
replicas
:
1
resources
:
requests
:
cpu
:
"
10"
memory
:
"
20Gi"
gpu
:
"
1"
limits
:
cpu
:
"
10"
memory
:
"
20Gi"
gpu
:
"
1"
extraPodSpec
:
mainContainer
:
image
:
my-registry/sglang-runtime:my-tag
workingDir
:
/workspace/components/backends/sglang
args
:
-
"
python3"
-
"
-m"
-
"
dynamo.sglang.worker"
-
"
--model-path"
-
"
deepseek-ai/DeepSeek-R1-Distill-Llama-8B"
-
"
--served-model-name"
-
"
deepseek-ai/DeepSeek-R1-Distill-Llama-8B"
-
"
--page-size"
-
"
16"
-
"
--tp"
-
"
1"
-
"
--trust-remote-code"
-
"
--skip-tokenizer-init"
-
"
--disaggregation-mode"
-
"
prefill"
-
"
--disaggregation-transfer-backend"
-
"
nixl"
\ No newline at end of file
components/backends/sglang/launch/agg.sh
View file @
cfc6178a
...
@@ -15,7 +15,7 @@ trap cleanup EXIT INT TERM
...
@@ -15,7 +15,7 @@ trap cleanup EXIT INT TERM
python3
-m
dynamo.sglang.utils.clear_namespace
--namespace
dynamo
python3
-m
dynamo.sglang.utils.clear_namespace
--namespace
dynamo
# run ingress
# run ingress
dynamo run
in
=
http
out
=
dyn
--http-port
=
8000 &
python3
-m
dynamo.frontend
--http-port
=
8000 &
DYNAMO_PID
=
$!
DYNAMO_PID
=
$!
# run worker
# run worker
...
...
components/backends/sglang/launch/disagg.sh
View file @
cfc6178a
...
@@ -15,7 +15,7 @@ trap cleanup EXIT INT TERM
...
@@ -15,7 +15,7 @@ trap cleanup EXIT INT TERM
python3
-m
dynamo.sglang.utils.clear_namespace
--namespace
dynamo
python3
-m
dynamo.sglang.utils.clear_namespace
--namespace
dynamo
# run ingress
# run ingress
dynamo run
in
=
http
out
=
dyn
--http-port
=
8000 &
python3
-m
dynamo.frontend
--http-port
=
8000 &
DYNAMO_PID
=
$!
DYNAMO_PID
=
$!
# run prefill worker
# run prefill worker
...
...
components/backends/sglang/launch/disagg_dp_attn.sh
View file @
cfc6178a
...
@@ -15,7 +15,7 @@ trap cleanup EXIT INT TERM
...
@@ -15,7 +15,7 @@ trap cleanup EXIT INT TERM
python3
-m
dynamo.sglang.utils.clear_namespace
--namespace
dynamo
python3
-m
dynamo.sglang.utils.clear_namespace
--namespace
dynamo
# run ingress
# run ingress
dynamo run
in
=
http
out
=
dyn
--http-port
=
8000 &
python3
-m
dynamo.frontend
--http-port
=
8000 &
DYNAMO_PID
=
$!
DYNAMO_PID
=
$!
# run prefill worker
# run prefill worker
...
@@ -33,7 +33,7 @@ python3 -m dynamo.sglang.worker \
...
@@ -33,7 +33,7 @@ python3 -m dynamo.sglang.worker \
PREFILL_PID
=
$!
PREFILL_PID
=
$!
# run decode worker
# run decode worker
CUDA_VISIBLE_DEVICES
=
2,3 python3 dynamo.sglang.decode_worker
\
CUDA_VISIBLE_DEVICES
=
2,3 python3
-m
dynamo.sglang.decode_worker
\
--model-path
silence09/DeepSeek-R1-Small-2layers
\
--model-path
silence09/DeepSeek-R1-Small-2layers
\
--served-model-name
silence09/DeepSeek-R1-Small-2layers
\
--served-model-name
silence09/DeepSeek-R1-Small-2layers
\
--tp
2
\
--tp
2
\
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment