Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
dynamo
Commits
5fd96a23
Unverified
Commit
5fd96a23
authored
Nov 11, 2025
by
ishandhanani
Committed by
GitHub
Nov 12, 2025
Browse files
feat: add --enable-otel flag to SGLang launch scripts (#4243)
parent
fbad2860
Changes
6
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
193 additions
and
60 deletions
+193
-60
examples/backends/sglang/launch/agg.sh
examples/backends/sglang/launch/agg.sh
+34
-2
examples/backends/sglang/launch/agg_embed.sh
examples/backends/sglang/launch/agg_embed.sh
+35
-1
examples/backends/sglang/launch/agg_router.sh
examples/backends/sglang/launch/agg_router.sh
+39
-3
examples/backends/sglang/launch/disagg.sh
examples/backends/sglang/launch/disagg.sh
+38
-2
examples/backends/sglang/launch/disagg_dp_attn.sh
examples/backends/sglang/launch/disagg_dp_attn.sh
+0
-48
examples/backends/sglang/launch/disagg_router.sh
examples/backends/sglang/launch/disagg_router.sh
+47
-4
No files found.
examples/backends/sglang/launch/agg.sh
View file @
5fd96a23
...
...
@@ -11,13 +11,45 @@ cleanup() {
}
trap
cleanup EXIT INT TERM
# Parse command line arguments
ENABLE_OTEL
=
false
while
[[
$#
-gt
0
]]
;
do
case
$1
in
--enable-otel
)
ENABLE_OTEL
=
true
shift
;;
-h
|
--help
)
echo
"Usage:
$0
[OPTIONS]"
echo
"Options:"
echo
" --enable-otel Enable OpenTelemetry tracing"
echo
" -h, --help Show this help message"
echo
""
echo
"Note: System metrics are enabled by default on port 8081 (worker)"
exit
0
;;
*
)
echo
"Unknown option:
$1
"
echo
"Use --help for usage information"
exit
1
;;
esac
done
# Enable tracing if requested
if
[
"
$ENABLE_OTEL
"
=
true
]
;
then
export
DYN_LOGGING_JSONL
=
true
export
OTEL_EXPORT_ENABLED
=
1
export
OTEL_EXPORTER_OTLP_TRACES_ENDPOINT
=
${
OTEL_EXPORTER_OTLP_TRACES_ENDPOINT
:-
http
://localhost:4317
}
fi
# run ingress
OTEL_SERVICE_NAME
=
dynamo-frontend
\
python3
-m
dynamo.frontend
--http-port
=
8000 &
DYNAMO_PID
=
$!
# run worker
with metrics enabled
DYN_SYSTEM_PORT
=
8081
\
# run worker
OTEL_SERVICE_NAME
=
dynamo-worker
DYN_SYSTEM_PORT
=
8081
\
python3
-m
dynamo.sglang
\
--model-path
Qwen/Qwen3-0.6B
\
--served-model-name
Qwen/Qwen3-0.6B
\
...
...
examples/backends/sglang/launch/agg_embed.sh
View file @
5fd96a23
...
...
@@ -11,12 +11,45 @@ cleanup() {
}
trap
cleanup EXIT INT TERM
# Parse command line arguments
ENABLE_OTEL
=
false
while
[[
$#
-gt
0
]]
;
do
case
$1
in
--enable-otel
)
ENABLE_OTEL
=
true
shift
;;
-h
|
--help
)
echo
"Usage:
$0
[OPTIONS]"
echo
"Options:"
echo
" --enable-otel Enable OpenTelemetry tracing"
echo
" -h, --help Show this help message"
echo
""
echo
"Note: System metrics are enabled by default on port 8081 (worker)"
exit
0
;;
*
)
echo
"Unknown option:
$1
"
echo
"Use --help for usage information"
exit
1
;;
esac
done
# Enable tracing if requested
if
[
"
$ENABLE_OTEL
"
=
true
]
;
then
export
DYN_LOGGING_JSONL
=
true
export
OTEL_EXPORT_ENABLED
=
1
export
OTEL_EXPORTER_OTLP_TRACES_ENDPOINT
=
${
OTEL_EXPORTER_OTLP_TRACES_ENDPOINT
:-
http
://localhost:4317
}
fi
# run ingress
OTEL_SERVICE_NAME
=
dynamo-frontend
\
python3
-m
dynamo.frontend
--http-port
=
8000 &
DYNAMO_PID
=
$!
# run worker
OTEL_SERVICE_NAME
=
dynamo-worker-embedding
DYN_SYSTEM_PORT
=
8081
\
python3
-m
dynamo.sglang
\
--embedding-worker
\
--model-path
Qwen/Qwen3-Embedding-4B
\
...
...
@@ -24,4 +57,5 @@ python3 -m dynamo.sglang \
--page-size
16
\
--tp
1
\
--trust-remote-code
\
--use-sglang-tokenizer
--use-sglang-tokenizer
\
--enable-metrics
examples/backends/sglang/launch/agg_router.sh
View file @
5fd96a23
...
...
@@ -11,25 +11,61 @@ cleanup() {
}
trap
cleanup EXIT INT TERM
# Parse command line arguments
ENABLE_OTEL
=
false
while
[[
$#
-gt
0
]]
;
do
case
$1
in
--enable-otel
)
ENABLE_OTEL
=
true
shift
;;
-h
|
--help
)
echo
"Usage:
$0
[OPTIONS]"
echo
"Options:"
echo
" --enable-otel Enable OpenTelemetry tracing"
echo
" -h, --help Show this help message"
echo
""
echo
"Note: System metrics are enabled by default on ports 8081 (worker-1), 8082 (worker-2)"
exit
0
;;
*
)
echo
"Unknown option:
$1
"
echo
"Use --help for usage information"
exit
1
;;
esac
done
# Enable tracing if requested
if
[
"
$ENABLE_OTEL
"
=
true
]
;
then
export
DYN_LOGGING_JSONL
=
true
export
OTEL_EXPORT_ENABLED
=
1
export
OTEL_EXPORTER_OTLP_TRACES_ENDPOINT
=
${
OTEL_EXPORTER_OTLP_TRACES_ENDPOINT
:-
http
://localhost:4317
}
fi
# run ingress
python
-m
dynamo.frontend
--router-mode
kv
--http-port
=
8000 &
OTEL_SERVICE_NAME
=
dynamo-frontend
\
python3
-m
dynamo.frontend
--router-mode
kv
--http-port
=
8000 &
DYNAMO_PID
=
$!
# run worker
OTEL_SERVICE_NAME
=
dynamo-worker-1
DYN_SYSTEM_PORT
=
8081
\
python3
-m
dynamo.sglang
\
--model-path
Qwen/Qwen3-0.6B
\
--served-model-name
Qwen/Qwen3-0.6B
\
--page-size
16
\
--tp
1
\
--trust-remote-code
\
--kv-events-config
'{"publisher":"zmq","topic":"kv-events","endpoint":"tcp://*:5557"}'
&
--kv-events-config
'{"publisher":"zmq","topic":"kv-events","endpoint":"tcp://*:5557"}'
\
--enable-metrics
&
WORKER_PID
=
$!
OTEL_SERVICE_NAME
=
dynamo-worker-2
DYN_SYSTEM_PORT
=
8082
\
CUDA_VISIBLE_DEVICES
=
1 python3
-m
dynamo.sglang
\
--model-path
Qwen/Qwen3-0.6B
\
--served-model-name
Qwen/Qwen3-0.6B
\
--page-size
16
\
--tp
1
\
--trust-remote-code
\
--kv-events-config
'{"publisher":"zmq","topic":"kv-events","endpoint":"tcp://*:5558"}'
--kv-events-config
'{"publisher":"zmq","topic":"kv-events","endpoint":"tcp://*:5558"}'
\
--enable-metrics
examples/backends/sglang/launch/disagg.sh
View file @
5fd96a23
...
...
@@ -11,12 +11,45 @@ cleanup() {
}
trap
cleanup EXIT INT TERM
# Parse command line arguments
ENABLE_OTEL
=
false
while
[[
$#
-gt
0
]]
;
do
case
$1
in
--enable-otel
)
ENABLE_OTEL
=
true
shift
;;
-h
|
--help
)
echo
"Usage:
$0
[OPTIONS]"
echo
"Options:"
echo
" --enable-otel Enable OpenTelemetry tracing"
echo
" -h, --help Show this help message"
echo
""
echo
"Note: System metrics are enabled by default on ports 8081 (prefill), 8082 (decode)"
exit
0
;;
*
)
echo
"Unknown option:
$1
"
echo
"Use --help for usage information"
exit
1
;;
esac
done
# Enable tracing if requested
if
[
"
$ENABLE_OTEL
"
=
true
]
;
then
export
DYN_LOGGING_JSONL
=
true
export
OTEL_EXPORT_ENABLED
=
1
export
OTEL_EXPORTER_OTLP_TRACES_ENDPOINT
=
${
OTEL_EXPORTER_OTLP_TRACES_ENDPOINT
:-
http
://localhost:4317
}
fi
# run ingress
OTEL_SERVICE_NAME
=
dynamo-frontend
\
python3
-m
dynamo.frontend
--http-port
=
8000 &
DYNAMO_PID
=
$!
# run prefill worker
OTEL_SERVICE_NAME
=
dynamo-worker-prefill
DYN_SYSTEM_PORT
=
8081
\
python3
-m
dynamo.sglang
\
--model-path
Qwen/Qwen3-0.6B
\
--served-model-name
Qwen/Qwen3-0.6B
\
...
...
@@ -26,10 +59,12 @@ python3 -m dynamo.sglang \
--disaggregation-mode
prefill
\
--disaggregation-bootstrap-port
12345
\
--host
0.0.0.0
\
--disaggregation-transfer-backend
nixl &
--disaggregation-transfer-backend
nixl
\
--enable-metrics
&
PREFILL_PID
=
$!
# run decode worker
OTEL_SERVICE_NAME
=
dynamo-worker-decode
DYN_SYSTEM_PORT
=
8082
\
CUDA_VISIBLE_DEVICES
=
1 python3
-m
dynamo.sglang
\
--model-path
Qwen/Qwen3-0.6B
\
--served-model-name
Qwen/Qwen3-0.6B
\
...
...
@@ -39,4 +74,5 @@ CUDA_VISIBLE_DEVICES=1 python3 -m dynamo.sglang \
--disaggregation-mode
decode
\
--disaggregation-bootstrap-port
12345
\
--host
0.0.0.0
\
--disaggregation-transfer-backend
nixl
--disaggregation-transfer-backend
nixl
\
--enable-metrics
examples/backends/sglang/launch/disagg_dp_attn.sh
deleted
100755 → 0
View file @
fbad2860
#!/bin/bash
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
# Setup cleanup trap
cleanup
()
{
echo
"Cleaning up background processes..."
kill
$DYNAMO_PID
$PREFILL_PID
2>/dev/null
||
true
wait
$DYNAMO_PID
$PREFILL_PID
2>/dev/null
||
true
echo
"Cleanup complete."
}
trap
cleanup EXIT INT TERM
# run ingress
python3
-m
dynamo.frontend
--http-port
=
8000 &
DYNAMO_PID
=
$!
# run prefill worker
python3
-m
dynamo.sglang
\
--model-path
silence09/DeepSeek-R1-Small-2layers
\
--served-model-name
silence09/DeepSeek-R1-Small-2layers
\
--tp
2
\
--dp-size
2
\
--page-size
16
\
--enable-dp-attention
\
--host
0.0.0.0
\
--trust-remote-code
\
--disaggregation-mode
prefill
\
--disaggregation-transfer-backend
nixl
\
--load-balance-method
round_robin
\
--port
30000 &
PREFILL_PID
=
$!
# run decode worker
CUDA_VISIBLE_DEVICES
=
2,3 python3
-m
dynamo.sglang
\
--model-path
silence09/DeepSeek-R1-Small-2layers
\
--served-model-name
silence09/DeepSeek-R1-Small-2layers
\
--tp
2
\
--dp-size
2
\
--page-size
16
\
--enable-dp-attention
\
--host
0.0.0.0
\
--trust-remote-code
\
--disaggregation-mode
decode
\
--disaggregation-transfer-backend
nixl
\
--prefill-round-robin-balance
\
--port
31000
examples/backends/sglang/launch/disagg_router.sh
View file @
5fd96a23
...
...
@@ -11,7 +11,41 @@ cleanup() {
}
trap
cleanup EXIT INT TERM
# Parse command line arguments
ENABLE_OTEL
=
false
while
[[
$#
-gt
0
]]
;
do
case
$1
in
--enable-otel
)
ENABLE_OTEL
=
true
shift
;;
-h
|
--help
)
echo
"Usage:
$0
[OPTIONS]"
echo
"Options:"
echo
" --enable-otel Enable OpenTelemetry tracing"
echo
" -h, --help Show this help message"
echo
""
echo
"Note: System metrics are enabled by default on ports:"
echo
" 8081 (router), 8082-8083 (prefill workers), 8084-8085 (decode workers)"
exit
0
;;
*
)
echo
"Unknown option:
$1
"
echo
"Use --help for usage information"
exit
1
;;
esac
done
# Enable tracing if requested
if
[
"
$ENABLE_OTEL
"
=
true
]
;
then
export
DYN_LOGGING_JSONL
=
true
export
OTEL_EXPORT_ENABLED
=
1
export
OTEL_EXPORTER_OTLP_TRACES_ENDPOINT
=
${
OTEL_EXPORTER_OTLP_TRACES_ENDPOINT
:-
http
://localhost:4317
}
fi
# run ingress
OTEL_SERVICE_NAME
=
dynamo-frontend
\
python3
-m
dynamo.frontend
\
--http-port
=
8000
\
--router-mode
kv
\
...
...
@@ -20,6 +54,7 @@ python3 -m dynamo.frontend \
DYNAMO_PID
=
$!
# run prefill router
OTEL_SERVICE_NAME
=
dynamo-router-prefill
DYN_SYSTEM_PORT
=
8081
\
python3
-m
dynamo.router
\
--endpoint
dynamo.prefill.generate
\
--block-size
64
\
...
...
@@ -28,6 +63,7 @@ python3 -m dynamo.router \
PREFILL_ROUTER_PID
=
$!
# run prefill worker
OTEL_SERVICE_NAME
=
dynamo-worker-prefill-1
DYN_SYSTEM_PORT
=
8082
\
python3
-m
dynamo.sglang
\
--model-path
deepseek-ai/DeepSeek-R1-Distill-Llama-8B
\
--served-model-name
deepseek-ai/DeepSeek-R1-Distill-Llama-8B
\
...
...
@@ -37,10 +73,12 @@ python3 -m dynamo.sglang \
--disaggregation-mode
prefill
\
--host
0.0.0.0
\
--kv-events-config
'{"publisher":"zmq","topic":"kv-events","endpoint":"tcp://*:5557"}'
\
--disaggregation-transfer-backend
nixl &
--disaggregation-transfer-backend
nixl
\
--enable-metrics
&
PREFILL_PID
=
$!
# run prefill worker
OTEL_SERVICE_NAME
=
dynamo-worker-prefill-2
DYN_SYSTEM_PORT
=
8083
\
CUDA_VISIBLE_DEVICES
=
1 python3
-m
dynamo.sglang
\
--model-path
deepseek-ai/DeepSeek-R1-Distill-Llama-8B
\
--served-model-name
deepseek-ai/DeepSeek-R1-Distill-Llama-8B
\
...
...
@@ -50,10 +88,12 @@ CUDA_VISIBLE_DEVICES=1 python3 -m dynamo.sglang \
--disaggregation-mode
prefill
\
--host
0.0.0.0
\
--kv-events-config
'{"publisher":"zmq","topic":"kv-events","endpoint":"tcp://*:5558"}'
\
--disaggregation-transfer-backend
nixl &
--disaggregation-transfer-backend
nixl
\
--enable-metrics
&
PREFILL_PID
=
$!
# run decode worker
OTEL_SERVICE_NAME
=
dynamo-worker-decode-1
DYN_SYSTEM_PORT
=
8084
\
CUDA_VISIBLE_DEVICES
=
3 python3
-m
dynamo.sglang
\
--model-path
deepseek-ai/DeepSeek-R1-Distill-Llama-8B
\
--served-model-name
deepseek-ai/DeepSeek-R1-Distill-Llama-8B
\
...
...
@@ -63,10 +103,12 @@ CUDA_VISIBLE_DEVICES=3 python3 -m dynamo.sglang \
--disaggregation-mode
decode
\
--host
0.0.0.0
\
--kv-events-config
'{"publisher":"zmq","topic":"kv-events","endpoint":"tcp://*:5560"}'
\
--disaggregation-transfer-backend
nixl &
--disaggregation-transfer-backend
nixl
\
--enable-metrics
&
PREFILL_PID
=
$!
# run decode worker
OTEL_SERVICE_NAME
=
dynamo-worker-decode-2
DYN_SYSTEM_PORT
=
8085
\
CUDA_VISIBLE_DEVICES
=
2 python3
-m
dynamo.sglang
\
--model-path
deepseek-ai/DeepSeek-R1-Distill-Llama-8B
\
--served-model-name
deepseek-ai/DeepSeek-R1-Distill-Llama-8B
\
...
...
@@ -76,4 +118,5 @@ CUDA_VISIBLE_DEVICES=2 python3 -m dynamo.sglang \
--disaggregation-mode
decode
\
--host
0.0.0.0
\
--kv-events-config
'{"publisher":"zmq","topic":"kv-events","endpoint":"tcp://*:5559"}'
\
--disaggregation-transfer-backend
nixl
--disaggregation-transfer-backend
nixl
\
--enable-metrics
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment