disagg_router.sh 4.38 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
#!/bin/bash
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0

# Setup cleanup trap
cleanup() {
    echo "Cleaning up background processes..."
    kill $DYNAMO_PID $PREFILL_PID $PREFILL_ROUTER_PID 2>/dev/null || true
    wait $DYNAMO_PID $PREFILL_PID $PREFILL_ROUTER_PID 2>/dev/null || true
    echo "Cleanup complete."
}
trap cleanup EXIT INT TERM

14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
# Parse command line arguments
ENABLE_OTEL=false
while [[ $# -gt 0 ]]; do
    case $1 in
        --enable-otel)
            ENABLE_OTEL=true
            shift
            ;;
        -h|--help)
            echo "Usage: $0 [OPTIONS]"
            echo "Options:"
            echo "  --enable-otel        Enable OpenTelemetry tracing"
            echo "  -h, --help           Show this help message"
            echo ""
            echo "Note: System metrics are enabled by default on ports:"
            echo "  8081 (router), 8082-8083 (prefill workers), 8084-8085 (decode workers)"
            exit 0
            ;;
        *)
            echo "Unknown option: $1"
            echo "Use --help for usage information"
            exit 1
            ;;
    esac
done

# Enable tracing if requested
41
TRACE_ARGS=()
42
43
44
45
if [ "$ENABLE_OTEL" = true ]; then
    export DYN_LOGGING_JSONL=true
    export OTEL_EXPORT_ENABLED=1
    export OTEL_EXPORTER_OTLP_TRACES_ENDPOINT=${OTEL_EXPORTER_OTLP_TRACES_ENDPOINT:-http://localhost:4317}
46
    TRACE_ARGS+=(--enable-trace --otlp-traces-endpoint localhost:4317)
47
48
fi

49
# run ingress
50
# dynamo.frontend accepts either --http-port flag or DYN_HTTP_PORT env var (defaults to 8000)
51
OTEL_SERVICE_NAME=dynamo-frontend \
52
53
54
55
56
57
58
python3 -m dynamo.frontend \
 --router-mode kv \
 --kv-overlap-score-weight 0 \
 --router-reset-states &
DYNAMO_PID=$!

# run prefill router
59
60
61
# Use numeric DYN_SYSTEM_PORT{N} env vars so launchers/test harnesses can set
# ports without encoding role names (prefill/decode) in the env var.
OTEL_SERVICE_NAME=dynamo-router-prefill DYN_SYSTEM_PORT=${DYN_SYSTEM_PORT1:-8081} \
62
63
64
65
66
67
68
69
python3 -m dynamo.router \
  --endpoint dynamo.prefill.generate \
  --block-size 64 \
  --router-reset-states \
  --no-track-active-blocks &
PREFILL_ROUTER_PID=$!

# run prefill worker
70
OTEL_SERVICE_NAME=dynamo-worker-prefill-1 DYN_SYSTEM_PORT=${DYN_SYSTEM_PORT2:-8082} \
71
72
73
74
75
76
77
78
79
python3 -m dynamo.sglang \
  --model-path deepseek-ai/DeepSeek-R1-Distill-Llama-8B \
  --served-model-name deepseek-ai/DeepSeek-R1-Distill-Llama-8B \
  --page-size 64 \
  --tp 1 \
  --trust-remote-code \
  --disaggregation-mode prefill \
  --host 0.0.0.0 \
  --kv-events-config '{"publisher":"zmq","topic":"kv-events","endpoint":"tcp://*:5557"}' \
80
  --disaggregation-transfer-backend nixl \
81
82
  --enable-metrics \
  "${TRACE_ARGS[@]}" &
83
84
85
PREFILL_PID=$!

# run prefill worker
86
OTEL_SERVICE_NAME=dynamo-worker-prefill-2 DYN_SYSTEM_PORT=${DYN_SYSTEM_PORT3:-8083} \
87
88
89
90
91
92
93
94
95
CUDA_VISIBLE_DEVICES=1 python3 -m dynamo.sglang \
  --model-path deepseek-ai/DeepSeek-R1-Distill-Llama-8B \
  --served-model-name deepseek-ai/DeepSeek-R1-Distill-Llama-8B \
  --page-size 64 \
  --tp 1 \
  --trust-remote-code \
  --disaggregation-mode prefill \
  --host 0.0.0.0 \
  --kv-events-config '{"publisher":"zmq","topic":"kv-events","endpoint":"tcp://*:5558"}' \
96
  --disaggregation-transfer-backend nixl \
97
98
  --enable-metrics \
  "${TRACE_ARGS[@]}" &
99
100
101
PREFILL_PID=$!

# run decode worker
102
OTEL_SERVICE_NAME=dynamo-worker-decode-1 DYN_SYSTEM_PORT=${DYN_SYSTEM_PORT4:-8084} \
103
104
105
106
107
108
109
110
111
CUDA_VISIBLE_DEVICES=3 python3 -m dynamo.sglang \
  --model-path deepseek-ai/DeepSeek-R1-Distill-Llama-8B \
  --served-model-name deepseek-ai/DeepSeek-R1-Distill-Llama-8B \
  --page-size 64 \
  --tp 1 \
  --trust-remote-code \
  --disaggregation-mode decode \
  --host 0.0.0.0 \
  --kv-events-config '{"publisher":"zmq","topic":"kv-events","endpoint":"tcp://*:5560"}' \
112
  --disaggregation-transfer-backend nixl \
113
114
  --enable-metrics \
  "${TRACE_ARGS[@]}" &
115
116
117
PREFILL_PID=$!

# run decode worker
118
OTEL_SERVICE_NAME=dynamo-worker-decode-2 DYN_SYSTEM_PORT=${DYN_SYSTEM_PORT5:-8085} \
119
120
121
122
123
124
125
126
127
CUDA_VISIBLE_DEVICES=2 python3 -m dynamo.sglang \
  --model-path deepseek-ai/DeepSeek-R1-Distill-Llama-8B \
  --served-model-name deepseek-ai/DeepSeek-R1-Distill-Llama-8B \
  --page-size 64 \
  --tp 1 \
  --trust-remote-code \
  --disaggregation-mode decode \
  --host 0.0.0.0 \
  --kv-events-config '{"publisher":"zmq","topic":"kv-events","endpoint":"tcp://*:5559"}' \
128
  --disaggregation-transfer-backend nixl \
129
130
  --enable-metrics \
  "${TRACE_ARGS[@]}"