disagg.sh 3.72 KB
Newer Older
1
#!/bin/bash
2
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3
# SPDX-License-Identifier: Apache-2.0
4
5
6
#
# Disaggregated serving: prefill on GPU 0, decode on GPU 1.
# GPUs: 2
7
8
9
10
11
12
13
14
15
16

# Setup cleanup trap
cleanup() {
    echo "Cleaning up background processes..."
    kill $DYNAMO_PID $PREFILL_PID 2>/dev/null || true
    wait $DYNAMO_PID $PREFILL_PID 2>/dev/null || true
    echo "Cleanup complete."
}
trap cleanup EXIT INT TERM

17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
# Parse command line arguments
ENABLE_OTEL=false
while [[ $# -gt 0 ]]; do
    case $1 in
        --enable-otel)
            ENABLE_OTEL=true
            shift
            ;;
        -h|--help)
            echo "Usage: $0 [OPTIONS]"
            echo "Options:"
            echo "  --enable-otel        Enable OpenTelemetry tracing"
            echo "  -h, --help           Show this help message"
            echo ""
            echo "Note: System metrics are enabled by default on ports 8081 (prefill), 8082 (decode)"
            exit 0
            ;;
        *)
            echo "Unknown option: $1"
            echo "Use --help for usage information"
            exit 1
            ;;
    esac
done

# Enable tracing if requested
43
TRACE_ARGS=()
44
45
46
47
if [ "$ENABLE_OTEL" = true ]; then
    export DYN_LOGGING_JSONL=true
    export OTEL_EXPORT_ENABLED=1
    export OTEL_EXPORTER_OTLP_TRACES_ENDPOINT=${OTEL_EXPORTER_OTLP_TRACES_ENDPOINT:-http://localhost:4317}
48
    TRACE_ARGS+=(--enable-trace --otlp-traces-endpoint localhost:4317)
49
fi
50

51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
MODEL="Qwen/Qwen3-0.6B"
HTTP_PORT="${DYN_HTTP_PORT:-8000}"
echo "=========================================="
echo "Launching Disaggregated Workers (P/D)"
echo "=========================================="
echo "Model:       $MODEL"
echo "Frontend:    http://localhost:$HTTP_PORT"
echo "=========================================="
echo ""
echo "Example test command:"
echo ""
echo "  curl http://localhost:${HTTP_PORT}/v1/chat/completions \\"
echo "    -H 'Content-Type: application/json' \\"
echo "    -d '{"
echo "      \"model\": \"${MODEL}\","
66
echo "      \"messages\": [{\"role\": \"user\", \"content\": \"Explain why Roger Federer is considered one of the greatest tennis players of all time\"}],"
67
68
69
70
71
echo "      \"max_tokens\": 32"
echo "    }'"
echo ""
echo "=========================================="

72
# run ingress
73
# dynamo.frontend accepts either --http-port flag or DYN_HTTP_PORT env var (defaults to 8000)
74
OTEL_SERVICE_NAME=dynamo-frontend \
75
python3 -m dynamo.frontend &
76
77
DYNAMO_PID=$!

78
79
#AssertionError: Prefill round robin balance is required when dp size > 1. Please make sure that the prefill instance is launched with `--load-balance-method round_robin` and `--prefill-round-robin-balance` is set for decode server.

80
# run prefill worker
81
82
83
# Use DYN_SYSTEM_PORT1/2 instead of *_PREFILL/*_DECODE env names so test
# harnesses can set one simple pair for disaggregated deployments.
OTEL_SERVICE_NAME=dynamo-worker-prefill DYN_SYSTEM_PORT=${DYN_SYSTEM_PORT1:-8081} \
84
python3 -m dynamo.sglang \
85
86
  --model-path Qwen/Qwen3-0.6B \
  --served-model-name Qwen/Qwen3-0.6B \
87
  --page-size 16 \
88
  --tp 1 \
89
90
  --trust-remote-code \
  --disaggregation-mode prefill \
91
92
  --disaggregation-bootstrap-port 12345 \
  --host 0.0.0.0 \
93
  --port 40000 \
94
  --disaggregation-transfer-backend nixl \
95
96
  --enable-metrics \
  "${TRACE_ARGS[@]}" &
97
98
99
PREFILL_PID=$!

# run decode worker
100
OTEL_SERVICE_NAME=dynamo-worker-decode DYN_SYSTEM_PORT=${DYN_SYSTEM_PORT2:-8082} \
101
102
103
CUDA_VISIBLE_DEVICES=1 python3 -m dynamo.sglang \
  --model-path Qwen/Qwen3-0.6B \
  --served-model-name Qwen/Qwen3-0.6B \
104
  --page-size 16 \
105
  --tp 1 \
106
107
  --trust-remote-code \
  --disaggregation-mode decode \
108
109
  --disaggregation-bootstrap-port 12345 \
  --host 0.0.0.0 \
110
  --disaggregation-transfer-backend nixl \
111
112
  --enable-metrics \
  "${TRACE_ARGS[@]}"