launch_utils.sh 7.75 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
#!/bin/bash
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
# Shared launch utilities for example scripts.
#
# Goal:
#   Unify behavior and reduce duplication across vLLM, SGLang, and TensorRT-LLM
#   example launch scripts so they share one pattern for banners, process
#   management, and example curl output.
#
# Benefits:
#   - Single place to change banner format, example prompts, and wait/cleanup logic
#   - Consistent UX: same startup output and exit behavior across all backends
#   - Less per-script boilerplate (no manual PID tracking or custom cleanup traps)
#   - wait_any_exit propagates the first failing child's exit code and lets the
#     EXIT trap tear down the rest, so failures and Ctrl+C behave predictably
#
# Usage:
#   source "$(dirname "$(readlink -f "$0")")/../common/launch_utils.sh"
#   # or with SCRIPT_DIR already set:
#   source "$SCRIPT_DIR/../common/launch_utils.sh"
#
# Constants:
#   EXAMPLE_PROMPT         Default example prompt for curl commands (LLM / embedding)
#   EXAMPLE_PROMPT_VISUAL  Default example prompt for image / video generation
#
# Requires: bash 4.3+ (wait -n)
#
# Functions:
#   print_launch_banner    Print startup banner with model info and example curl
#   print_curl_footer      Print a custom curl example with standard framing (heredoc)
#   wait_any_exit          Wait for any background process to exit, propagate its code

if [[ "${BASH_VERSINFO[0]}" -lt 4 || ( "${BASH_VERSINFO[0]}" -eq 4 && "${BASH_VERSINFO[1]}" -lt 3 ) ]]; then
    echo "launch_utils.sh requires bash 4.3+ (for wait -n), found ${BASH_VERSION}" >&2
    exit 1
fi

EXAMPLE_PROMPT="Who is the tennis GOAT: Federer, Djokovic, or Nadal?"
EXAMPLE_PROMPT_VISUAL="A golden retriever riding a skateboard through a neon-lit city"

# wait_any_exit
#
# Waits for ANY backgrounded process to exit and propagates its exit code.
# Call this as the LAST line of every launch script, after backgrounding
# all processes (including the one that would otherwise run in the foreground).
#
# Why this is better than tracking PIDs manually or running in the foreground:
#   Foreground pattern:  if the frontend crashes, the script blocks on the
#   foreground worker and never notices until that worker also exits.
#   Manual PIDs:  requires bookkeeping ($DYNAMO_PID, $PREFILL_PID, ...),
#   a custom cleanup() function, and `wait $PID` only watches one process.
#   wait -n watches ALL children and returns as soon as ANY child dies, so
#   failures are detected immediately regardless of which process it was.
#
# Signal handling:
#   SIGTERM/SIGINT are trapped to exit 0 (clean shutdown).  Without this,
#   external cleanup (e.g. a test harness sending SIGTERM to the process
#   group) interrupts wait -n, which returns 143 (128+15).  Combined with
#   set -e, that non-zero code looks like a test failure.  Trapping TERM/INT
#   makes external teardown exit cleanly while still propagating real errors
#   (OOM, Python exceptions, etc.) from child processes.
#
# The EXIT trap (set at the top of each script) still fires when this function
# calls exit, tearing down the remaining processes via kill 0.
#
# Usage:
#   python -m dynamo.frontend &
#   python -m dynamo.vllm --model "$MODEL" &
#   wait_any_exit
wait_any_exit() {
    trap 'exit 0' TERM INT
    if ! jobs -p | grep -q .; then
        echo "wait_any_exit: no background processes found (script bug: did you forget '&'?)" >&2
        exit 1
    fi
    wait -n
    local _rc=$?
    echo "A background process exited with code $_rc"
    exit "$_rc"
}

# print_launch_banner [flags] <title> <model> <port> [extra_info_lines...]
#
# Prints a startup banner with model/frontend info and an example curl command.
#
# Flags (must come before positional args):
#   --multimodal       Use a multimodal (image_url) curl example (max_tokens=50)
#   --max-tokens N     Override max_tokens in the curl example (default: 32)
#   --no-curl          Print only the banner, skip the example curl section
#
# Positional args:
#   title              Banner title, e.g. "Launching Aggregated Serving (1 GPU)"
#   model              Model name, e.g. "$MODEL"
#   port               HTTP port, e.g. "$HTTP_PORT"
#   extra_info_lines   Optional extra lines printed below "Frontend:" (one per arg)
#
# Examples:
#   # Standard text serving
#   print_launch_banner "Launching Aggregated Serving (1 GPU)" "$MODEL" "$HTTP_PORT"
#
#   # With extra info
#   print_launch_banner "Launching Disagg on Same GPU" "$MODEL" "$HTTP_PORT" \
#       "GPU Mem:     0.09 per worker (4 GiB each)"
#
#   # Multimodal
#   print_launch_banner --multimodal "Launching Multimodal" "$MODEL" "$HTTP_PORT"
#
#   # Banner only (script prints its own curl or conditionally skips)
#   print_launch_banner --no-curl "Launching DSR1 (Multi-Node)" "$MODEL" "$HTTP_PORT" \
#       "Nodes:       $NUM_NODES" \
#       "Node rank:   $NODE_RANK"
print_launch_banner() {
    local _curl_type="text"
    local _max_tokens=32
    local _no_curl=false

    while [[ "${1:-}" == --* ]]; do
        case "$1" in
            --multimodal) _curl_type="multimodal"; _max_tokens=50; shift ;;
            --max-tokens) _max_tokens="$2"; shift 2 ;;
            --no-curl)    _no_curl=true; shift ;;
            *) break ;;
        esac
    done

    local _title="$1"
    local _model="$2"
    local _port="$3"
    shift 3

    echo "=========================================="
    echo "$_title"
    echo "=========================================="
    echo "Model:       $_model"
    echo "Frontend:    http://localhost:$_port"
138
139

    local _seq_len="${MAX_MODEL_LEN:-${CONTEXT_LENGTH:-${MAX_SEQ_LEN:-}}}"
140
    local _mem_args="${GPU_MEM_ARGS:-}"
141
    [[ -n "$_seq_len" ]] && echo "Max seq len: $_seq_len"
142
    [[ -n "$_mem_args" ]] && echo "GPU mem:     $_mem_args"
143

144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
    for _line in "$@"; do
        echo "$_line"
    done
    echo "=========================================="

    if [[ "$_no_curl" == true ]]; then
        return
    fi

    echo ""
    echo "Example test command:"
    echo ""

    if [[ "$_curl_type" == "multimodal" ]]; then
        cat <<CURL_EOF
  curl http://localhost:${_port}/v1/chat/completions \\
    -H 'Content-Type: application/json' \\
    -d '{
      "model": "${_model}",
      "messages": [{"role": "user", "content": [
        {"type": "text", "text": "Describe this image"},
        {"type": "image_url", "image_url": {"url": "https://upload.wikimedia.org/wikipedia/commons/thumb/4/47/PNG_transparency_demonstration_1.png/300px-PNG_transparency_demonstration_1.png"}}
      ]}],
      "max_tokens": ${_max_tokens}
    }'
CURL_EOF
    else
        cat <<CURL_EOF
  curl http://localhost:${_port}/v1/chat/completions \\
    -H 'Content-Type: application/json' \\
    -d '{
      "model": "${_model}",
      "messages": [{"role": "user", "content": "Hello!"}],
      "max_tokens": ${_max_tokens}
    }'
CURL_EOF
    fi

    echo ""
    echo "=========================================="
}

# print_curl_footer
#
# Prints a custom curl example wrapped in the standard framing (matching
# print_launch_banner's built-in curl output). Reads the curl command from
# stdin so callers can use a heredoc -- no quoting issues with embedded
# double quotes, variable interpolation, etc.
#
# Pair with print_launch_banner --no-curl for non-standard endpoints
# (images, video, embeddings, etc.) that need a custom request body.
#
# Usage:
#   print_launch_banner --no-curl "Launching Image Diffusion" "$MODEL" "$PORT"
#   print_curl_footer <<CURL
#   curl http://localhost:${PORT}/v1/images/generations \\
#     -H 'Content-Type: application/json' \\
#     -d '{
#       "model": "${MODEL}",
#       "prompt": "A cat on a skateboard"
#     }'
#   CURL
print_curl_footer() {
    echo ""
    echo "Example test command:"
    echo ""
    cat
    echo ""
    echo "=========================================="
}