Unverified Commit 5fd96a23 authored by ishandhanani's avatar ishandhanani Committed by GitHub
Browse files

feat: add --enable-otel flag to SGLang launch scripts (#4243)

parent fbad2860
...@@ -11,13 +11,45 @@ cleanup() { ...@@ -11,13 +11,45 @@ cleanup() {
} }
trap cleanup EXIT INT TERM trap cleanup EXIT INT TERM
# Parse command line arguments
ENABLE_OTEL=false
while [[ $# -gt 0 ]]; do
case $1 in
--enable-otel)
ENABLE_OTEL=true
shift
;;
-h|--help)
echo "Usage: $0 [OPTIONS]"
echo "Options:"
echo " --enable-otel Enable OpenTelemetry tracing"
echo " -h, --help Show this help message"
echo ""
echo "Note: System metrics are enabled by default on port 8081 (worker)"
exit 0
;;
*)
echo "Unknown option: $1"
echo "Use --help for usage information"
exit 1
;;
esac
done
# Enable tracing if requested
if [ "$ENABLE_OTEL" = true ]; then
export DYN_LOGGING_JSONL=true
export OTEL_EXPORT_ENABLED=1
export OTEL_EXPORTER_OTLP_TRACES_ENDPOINT=${OTEL_EXPORTER_OTLP_TRACES_ENDPOINT:-http://localhost:4317}
fi
# run ingress # run ingress
OTEL_SERVICE_NAME=dynamo-frontend \
python3 -m dynamo.frontend --http-port=8000 & python3 -m dynamo.frontend --http-port=8000 &
DYNAMO_PID=$! DYNAMO_PID=$!
# run worker with metrics enabled # run worker
DYN_SYSTEM_PORT=8081 \ OTEL_SERVICE_NAME=dynamo-worker DYN_SYSTEM_PORT=8081 \
python3 -m dynamo.sglang \ python3 -m dynamo.sglang \
--model-path Qwen/Qwen3-0.6B \ --model-path Qwen/Qwen3-0.6B \
--served-model-name Qwen/Qwen3-0.6B \ --served-model-name Qwen/Qwen3-0.6B \
......
...@@ -11,12 +11,45 @@ cleanup() { ...@@ -11,12 +11,45 @@ cleanup() {
} }
trap cleanup EXIT INT TERM trap cleanup EXIT INT TERM
# Parse command line arguments
ENABLE_OTEL=false
while [[ $# -gt 0 ]]; do
case $1 in
--enable-otel)
ENABLE_OTEL=true
shift
;;
-h|--help)
echo "Usage: $0 [OPTIONS]"
echo "Options:"
echo " --enable-otel Enable OpenTelemetry tracing"
echo " -h, --help Show this help message"
echo ""
echo "Note: System metrics are enabled by default on port 8081 (worker)"
exit 0
;;
*)
echo "Unknown option: $1"
echo "Use --help for usage information"
exit 1
;;
esac
done
# Enable tracing if requested
if [ "$ENABLE_OTEL" = true ]; then
export DYN_LOGGING_JSONL=true
export OTEL_EXPORT_ENABLED=1
export OTEL_EXPORTER_OTLP_TRACES_ENDPOINT=${OTEL_EXPORTER_OTLP_TRACES_ENDPOINT:-http://localhost:4317}
fi
# run ingress # run ingress
OTEL_SERVICE_NAME=dynamo-frontend \
python3 -m dynamo.frontend --http-port=8000 & python3 -m dynamo.frontend --http-port=8000 &
DYNAMO_PID=$! DYNAMO_PID=$!
# run worker # run worker
OTEL_SERVICE_NAME=dynamo-worker-embedding DYN_SYSTEM_PORT=8081 \
python3 -m dynamo.sglang \ python3 -m dynamo.sglang \
--embedding-worker \ --embedding-worker \
--model-path Qwen/Qwen3-Embedding-4B \ --model-path Qwen/Qwen3-Embedding-4B \
...@@ -24,4 +57,5 @@ python3 -m dynamo.sglang \ ...@@ -24,4 +57,5 @@ python3 -m dynamo.sglang \
--page-size 16 \ --page-size 16 \
--tp 1 \ --tp 1 \
--trust-remote-code \ --trust-remote-code \
--use-sglang-tokenizer --use-sglang-tokenizer \
--enable-metrics
...@@ -11,25 +11,61 @@ cleanup() { ...@@ -11,25 +11,61 @@ cleanup() {
} }
trap cleanup EXIT INT TERM trap cleanup EXIT INT TERM
# Parse command line arguments
ENABLE_OTEL=false
while [[ $# -gt 0 ]]; do
case $1 in
--enable-otel)
ENABLE_OTEL=true
shift
;;
-h|--help)
echo "Usage: $0 [OPTIONS]"
echo "Options:"
echo " --enable-otel Enable OpenTelemetry tracing"
echo " -h, --help Show this help message"
echo ""
echo "Note: System metrics are enabled by default on ports 8081 (worker-1), 8082 (worker-2)"
exit 0
;;
*)
echo "Unknown option: $1"
echo "Use --help for usage information"
exit 1
;;
esac
done
# Enable tracing if requested
if [ "$ENABLE_OTEL" = true ]; then
export DYN_LOGGING_JSONL=true
export OTEL_EXPORT_ENABLED=1
export OTEL_EXPORTER_OTLP_TRACES_ENDPOINT=${OTEL_EXPORTER_OTLP_TRACES_ENDPOINT:-http://localhost:4317}
fi
# run ingress # run ingress
python -m dynamo.frontend --router-mode kv --http-port=8000 & OTEL_SERVICE_NAME=dynamo-frontend \
python3 -m dynamo.frontend --router-mode kv --http-port=8000 &
DYNAMO_PID=$! DYNAMO_PID=$!
# run worker # run worker
OTEL_SERVICE_NAME=dynamo-worker-1 DYN_SYSTEM_PORT=8081 \
python3 -m dynamo.sglang \ python3 -m dynamo.sglang \
--model-path Qwen/Qwen3-0.6B \ --model-path Qwen/Qwen3-0.6B \
--served-model-name Qwen/Qwen3-0.6B \ --served-model-name Qwen/Qwen3-0.6B \
--page-size 16 \ --page-size 16 \
--tp 1 \ --tp 1 \
--trust-remote-code \ --trust-remote-code \
--kv-events-config '{"publisher":"zmq","topic":"kv-events","endpoint":"tcp://*:5557"}' & --kv-events-config '{"publisher":"zmq","topic":"kv-events","endpoint":"tcp://*:5557"}' \
--enable-metrics &
WORKER_PID=$! WORKER_PID=$!
OTEL_SERVICE_NAME=dynamo-worker-2 DYN_SYSTEM_PORT=8082 \
CUDA_VISIBLE_DEVICES=1 python3 -m dynamo.sglang \ CUDA_VISIBLE_DEVICES=1 python3 -m dynamo.sglang \
--model-path Qwen/Qwen3-0.6B \ --model-path Qwen/Qwen3-0.6B \
--served-model-name Qwen/Qwen3-0.6B \ --served-model-name Qwen/Qwen3-0.6B \
--page-size 16 \ --page-size 16 \
--tp 1 \ --tp 1 \
--trust-remote-code \ --trust-remote-code \
--kv-events-config '{"publisher":"zmq","topic":"kv-events","endpoint":"tcp://*:5558"}' --kv-events-config '{"publisher":"zmq","topic":"kv-events","endpoint":"tcp://*:5558"}' \
--enable-metrics
...@@ -11,12 +11,45 @@ cleanup() { ...@@ -11,12 +11,45 @@ cleanup() {
} }
trap cleanup EXIT INT TERM trap cleanup EXIT INT TERM
# Parse command line arguments
ENABLE_OTEL=false
while [[ $# -gt 0 ]]; do
case $1 in
--enable-otel)
ENABLE_OTEL=true
shift
;;
-h|--help)
echo "Usage: $0 [OPTIONS]"
echo "Options:"
echo " --enable-otel Enable OpenTelemetry tracing"
echo " -h, --help Show this help message"
echo ""
echo "Note: System metrics are enabled by default on ports 8081 (prefill), 8082 (decode)"
exit 0
;;
*)
echo "Unknown option: $1"
echo "Use --help for usage information"
exit 1
;;
esac
done
# Enable tracing if requested
if [ "$ENABLE_OTEL" = true ]; then
export DYN_LOGGING_JSONL=true
export OTEL_EXPORT_ENABLED=1
export OTEL_EXPORTER_OTLP_TRACES_ENDPOINT=${OTEL_EXPORTER_OTLP_TRACES_ENDPOINT:-http://localhost:4317}
fi
# run ingress # run ingress
OTEL_SERVICE_NAME=dynamo-frontend \
python3 -m dynamo.frontend --http-port=8000 & python3 -m dynamo.frontend --http-port=8000 &
DYNAMO_PID=$! DYNAMO_PID=$!
# run prefill worker # run prefill worker
OTEL_SERVICE_NAME=dynamo-worker-prefill DYN_SYSTEM_PORT=8081 \
python3 -m dynamo.sglang \ python3 -m dynamo.sglang \
--model-path Qwen/Qwen3-0.6B \ --model-path Qwen/Qwen3-0.6B \
--served-model-name Qwen/Qwen3-0.6B \ --served-model-name Qwen/Qwen3-0.6B \
...@@ -26,10 +59,12 @@ python3 -m dynamo.sglang \ ...@@ -26,10 +59,12 @@ python3 -m dynamo.sglang \
--disaggregation-mode prefill \ --disaggregation-mode prefill \
--disaggregation-bootstrap-port 12345 \ --disaggregation-bootstrap-port 12345 \
--host 0.0.0.0 \ --host 0.0.0.0 \
--disaggregation-transfer-backend nixl & --disaggregation-transfer-backend nixl \
--enable-metrics &
PREFILL_PID=$! PREFILL_PID=$!
# run decode worker # run decode worker
OTEL_SERVICE_NAME=dynamo-worker-decode DYN_SYSTEM_PORT=8082 \
CUDA_VISIBLE_DEVICES=1 python3 -m dynamo.sglang \ CUDA_VISIBLE_DEVICES=1 python3 -m dynamo.sglang \
--model-path Qwen/Qwen3-0.6B \ --model-path Qwen/Qwen3-0.6B \
--served-model-name Qwen/Qwen3-0.6B \ --served-model-name Qwen/Qwen3-0.6B \
...@@ -39,4 +74,5 @@ CUDA_VISIBLE_DEVICES=1 python3 -m dynamo.sglang \ ...@@ -39,4 +74,5 @@ CUDA_VISIBLE_DEVICES=1 python3 -m dynamo.sglang \
--disaggregation-mode decode \ --disaggregation-mode decode \
--disaggregation-bootstrap-port 12345 \ --disaggregation-bootstrap-port 12345 \
--host 0.0.0.0 \ --host 0.0.0.0 \
--disaggregation-transfer-backend nixl --disaggregation-transfer-backend nixl \
--enable-metrics
#!/bin/bash
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
# Setup cleanup trap
cleanup() {
echo "Cleaning up background processes..."
kill $DYNAMO_PID $PREFILL_PID 2>/dev/null || true
wait $DYNAMO_PID $PREFILL_PID 2>/dev/null || true
echo "Cleanup complete."
}
trap cleanup EXIT INT TERM
# run ingress
python3 -m dynamo.frontend --http-port=8000 &
DYNAMO_PID=$!
# run prefill worker
python3 -m dynamo.sglang \
--model-path silence09/DeepSeek-R1-Small-2layers \
--served-model-name silence09/DeepSeek-R1-Small-2layers \
--tp 2 \
--dp-size 2 \
--page-size 16 \
--enable-dp-attention \
--host 0.0.0.0 \
--trust-remote-code \
--disaggregation-mode prefill \
--disaggregation-transfer-backend nixl \
--load-balance-method round_robin \
--port 30000 &
PREFILL_PID=$!
# run decode worker
CUDA_VISIBLE_DEVICES=2,3 python3 -m dynamo.sglang \
--model-path silence09/DeepSeek-R1-Small-2layers \
--served-model-name silence09/DeepSeek-R1-Small-2layers \
--tp 2 \
--dp-size 2 \
--page-size 16 \
--enable-dp-attention \
--host 0.0.0.0 \
--trust-remote-code \
--disaggregation-mode decode \
--disaggregation-transfer-backend nixl \
--prefill-round-robin-balance \
--port 31000
...@@ -11,7 +11,41 @@ cleanup() { ...@@ -11,7 +11,41 @@ cleanup() {
} }
trap cleanup EXIT INT TERM trap cleanup EXIT INT TERM
# Parse command line arguments
ENABLE_OTEL=false
while [[ $# -gt 0 ]]; do
case $1 in
--enable-otel)
ENABLE_OTEL=true
shift
;;
-h|--help)
echo "Usage: $0 [OPTIONS]"
echo "Options:"
echo " --enable-otel Enable OpenTelemetry tracing"
echo " -h, --help Show this help message"
echo ""
echo "Note: System metrics are enabled by default on ports:"
echo " 8081 (router), 8082-8083 (prefill workers), 8084-8085 (decode workers)"
exit 0
;;
*)
echo "Unknown option: $1"
echo "Use --help for usage information"
exit 1
;;
esac
done
# Enable tracing if requested
if [ "$ENABLE_OTEL" = true ]; then
export DYN_LOGGING_JSONL=true
export OTEL_EXPORT_ENABLED=1
export OTEL_EXPORTER_OTLP_TRACES_ENDPOINT=${OTEL_EXPORTER_OTLP_TRACES_ENDPOINT:-http://localhost:4317}
fi
# run ingress # run ingress
OTEL_SERVICE_NAME=dynamo-frontend \
python3 -m dynamo.frontend \ python3 -m dynamo.frontend \
--http-port=8000 \ --http-port=8000 \
--router-mode kv \ --router-mode kv \
...@@ -20,6 +54,7 @@ python3 -m dynamo.frontend \ ...@@ -20,6 +54,7 @@ python3 -m dynamo.frontend \
DYNAMO_PID=$! DYNAMO_PID=$!
# run prefill router # run prefill router
OTEL_SERVICE_NAME=dynamo-router-prefill DYN_SYSTEM_PORT=8081 \
python3 -m dynamo.router \ python3 -m dynamo.router \
--endpoint dynamo.prefill.generate \ --endpoint dynamo.prefill.generate \
--block-size 64 \ --block-size 64 \
...@@ -28,6 +63,7 @@ python3 -m dynamo.router \ ...@@ -28,6 +63,7 @@ python3 -m dynamo.router \
PREFILL_ROUTER_PID=$! PREFILL_ROUTER_PID=$!
# run prefill worker # run prefill worker
OTEL_SERVICE_NAME=dynamo-worker-prefill-1 DYN_SYSTEM_PORT=8082 \
python3 -m dynamo.sglang \ python3 -m dynamo.sglang \
--model-path deepseek-ai/DeepSeek-R1-Distill-Llama-8B \ --model-path deepseek-ai/DeepSeek-R1-Distill-Llama-8B \
--served-model-name deepseek-ai/DeepSeek-R1-Distill-Llama-8B \ --served-model-name deepseek-ai/DeepSeek-R1-Distill-Llama-8B \
...@@ -37,10 +73,12 @@ python3 -m dynamo.sglang \ ...@@ -37,10 +73,12 @@ python3 -m dynamo.sglang \
--disaggregation-mode prefill \ --disaggregation-mode prefill \
--host 0.0.0.0 \ --host 0.0.0.0 \
--kv-events-config '{"publisher":"zmq","topic":"kv-events","endpoint":"tcp://*:5557"}' \ --kv-events-config '{"publisher":"zmq","topic":"kv-events","endpoint":"tcp://*:5557"}' \
--disaggregation-transfer-backend nixl & --disaggregation-transfer-backend nixl \
--enable-metrics &
PREFILL_PID=$! PREFILL_PID=$!
# run prefill worker # run prefill worker
OTEL_SERVICE_NAME=dynamo-worker-prefill-2 DYN_SYSTEM_PORT=8083 \
CUDA_VISIBLE_DEVICES=1 python3 -m dynamo.sglang \ CUDA_VISIBLE_DEVICES=1 python3 -m dynamo.sglang \
--model-path deepseek-ai/DeepSeek-R1-Distill-Llama-8B \ --model-path deepseek-ai/DeepSeek-R1-Distill-Llama-8B \
--served-model-name deepseek-ai/DeepSeek-R1-Distill-Llama-8B \ --served-model-name deepseek-ai/DeepSeek-R1-Distill-Llama-8B \
...@@ -50,10 +88,12 @@ CUDA_VISIBLE_DEVICES=1 python3 -m dynamo.sglang \ ...@@ -50,10 +88,12 @@ CUDA_VISIBLE_DEVICES=1 python3 -m dynamo.sglang \
--disaggregation-mode prefill \ --disaggregation-mode prefill \
--host 0.0.0.0 \ --host 0.0.0.0 \
--kv-events-config '{"publisher":"zmq","topic":"kv-events","endpoint":"tcp://*:5558"}' \ --kv-events-config '{"publisher":"zmq","topic":"kv-events","endpoint":"tcp://*:5558"}' \
--disaggregation-transfer-backend nixl & --disaggregation-transfer-backend nixl \
--enable-metrics &
PREFILL_PID=$! PREFILL_PID=$!
# run decode worker # run decode worker
OTEL_SERVICE_NAME=dynamo-worker-decode-1 DYN_SYSTEM_PORT=8084 \
CUDA_VISIBLE_DEVICES=3 python3 -m dynamo.sglang \ CUDA_VISIBLE_DEVICES=3 python3 -m dynamo.sglang \
--model-path deepseek-ai/DeepSeek-R1-Distill-Llama-8B \ --model-path deepseek-ai/DeepSeek-R1-Distill-Llama-8B \
--served-model-name deepseek-ai/DeepSeek-R1-Distill-Llama-8B \ --served-model-name deepseek-ai/DeepSeek-R1-Distill-Llama-8B \
...@@ -63,10 +103,12 @@ CUDA_VISIBLE_DEVICES=3 python3 -m dynamo.sglang \ ...@@ -63,10 +103,12 @@ CUDA_VISIBLE_DEVICES=3 python3 -m dynamo.sglang \
--disaggregation-mode decode \ --disaggregation-mode decode \
--host 0.0.0.0 \ --host 0.0.0.0 \
--kv-events-config '{"publisher":"zmq","topic":"kv-events","endpoint":"tcp://*:5560"}' \ --kv-events-config '{"publisher":"zmq","topic":"kv-events","endpoint":"tcp://*:5560"}' \
--disaggregation-transfer-backend nixl & --disaggregation-transfer-backend nixl \
--enable-metrics &
PREFILL_PID=$! PREFILL_PID=$!
# run decode worker # run decode worker
OTEL_SERVICE_NAME=dynamo-worker-decode-2 DYN_SYSTEM_PORT=8085 \
CUDA_VISIBLE_DEVICES=2 python3 -m dynamo.sglang \ CUDA_VISIBLE_DEVICES=2 python3 -m dynamo.sglang \
--model-path deepseek-ai/DeepSeek-R1-Distill-Llama-8B \ --model-path deepseek-ai/DeepSeek-R1-Distill-Llama-8B \
--served-model-name deepseek-ai/DeepSeek-R1-Distill-Llama-8B \ --served-model-name deepseek-ai/DeepSeek-R1-Distill-Llama-8B \
...@@ -76,4 +118,5 @@ CUDA_VISIBLE_DEVICES=2 python3 -m dynamo.sglang \ ...@@ -76,4 +118,5 @@ CUDA_VISIBLE_DEVICES=2 python3 -m dynamo.sglang \
--disaggregation-mode decode \ --disaggregation-mode decode \
--host 0.0.0.0 \ --host 0.0.0.0 \
--kv-events-config '{"publisher":"zmq","topic":"kv-events","endpoint":"tcp://*:5559"}' \ --kv-events-config '{"publisher":"zmq","topic":"kv-events","endpoint":"tcp://*:5559"}' \
--disaggregation-transfer-backend nixl --disaggregation-transfer-backend nixl \
--enable-metrics
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment