text-to-video-diffusion.sh 4.12 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
#!/bin/bash
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0

set -e

# Setup cleanup trap
cleanup() {
    echo "Cleaning up background processes..."
    kill $FRONTEND_PID 2>/dev/null || true
    wait $FRONTEND_PID 2>/dev/null || true
    echo "Cleanup complete."
}
trap cleanup EXIT INT TERM

# Defaults
WAN_SIZE="1b"
18
FS_URL="file:///tmp/dynamo_media"
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
HTTP_PORT="${HTTP_PORT:-8000}"
NUM_FRAMES=17
HEIGHT=480
WIDTH=832
NUM_INFERENCE_STEPS=50

# Parse command line arguments
EXTRA_ARGS=()
while [[ $# -gt 0 ]]; do
    case $1 in
        --wan-size)
            WAN_SIZE="$2"
            shift 2
            ;;
        --fs-url)
            FS_URL="$2"
            shift 2
            ;;
        --http-port)
            HTTP_PORT="$2"
            shift 2
            ;;
        --num-frames)
            NUM_FRAMES="$2"
            shift 2
            ;;
        --height)
            HEIGHT="$2"
            shift 2
            ;;
        --width)
            WIDTH="$2"
            shift 2
            ;;
        --num-inference-steps)
            NUM_INFERENCE_STEPS="$2"
            shift 2
            ;;
        -h|--help)
            echo "Usage: $0 [OPTIONS]"
            echo ""
            echo "Launch a Dynamo T2V (text-to-video) worker with Wan models."
            echo ""
            echo "Options:"
            echo "  --wan-size <1b|14b>          Model size (default: 1b)"
64
            echo "  --fs-url <url>               Filesystem URL for media storage (default: file:///tmp/dynamo_media)"
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
            echo "  --http-port <port>            Frontend HTTP port (default: 8000)"
            echo "  --num-frames <n>              Default frame count for health check (default: 17)"
            echo "  --height <n>                  Video height (default: 480)"
            echo "  --width <n>                   Video width (default: 832)"
            echo "  --num-inference-steps <n>     Denoising steps (default: 50)"
            echo "  -h, --help                    Show this help message"
            echo ""
            echo "Additional flags are forwarded to dynamo.sglang."
            exit 0
            ;;
        *)
            EXTRA_ARGS+=("$1")
            shift
            ;;
    esac
done

# Select model and TP based on size
case "$WAN_SIZE" in
    1b|1B)
        MODEL_PATH="Wan-AI/Wan2.1-T2V-1.3B-Diffusers"
        TP_SIZE=1
        ;;
    14b|14B)
        MODEL_PATH="Wan-AI/Wan2.1-T2V-14B-Diffusers"
        TP_SIZE=2
        ;;
    *)
        echo "Error: --wan-size must be '1b' or '14b', got '$WAN_SIZE'"
        exit 1
        ;;
esac

echo "=========================================="
echo "Launching T2V Video Generation Worker"
echo "=========================================="
echo "Model:       $MODEL_PATH"
echo "TP Size:     $TP_SIZE"
echo "Frontend:    http://localhost:$HTTP_PORT"
echo "FS URL:      $FS_URL"
echo "Resolution:  ${WIDTH}x${HEIGHT}"
echo "=========================================="
echo ""
echo "Example test command:"
echo ""
echo "  curl http://localhost:${HTTP_PORT}/v1/videos \\"
echo "    -H 'Content-Type: application/json' \\"
echo "    -d '{"
echo "      \"prompt\": \"A curious raccoon exploring a garden\","
echo "      \"model\": \"${MODEL_PATH}\","
echo "      \"seconds\": 2,"
echo "      \"size\": \"${WIDTH}x${HEIGHT}\","
echo "      \"response_format\": \"url\","
echo "      \"nvext\": {"
echo "        \"fps\": 8,"
echo "        \"num_frames\": ${NUM_FRAMES},"
echo "        \"num_inference_steps\": ${NUM_INFERENCE_STEPS}"
echo "      }"
echo "    }'"
echo ""
echo "=========================================="

# Launch frontend
echo "Starting Dynamo Frontend on port $HTTP_PORT..."
python3 -m dynamo.frontend \
    --http-port "$HTTP_PORT" &
FRONTEND_PID=$!

sleep 2

# Launch video generation worker
echo "Starting T2V Worker ($WAN_SIZE)..."
python3 -m dynamo.sglang \
    --model-path "$MODEL_PATH" \
    --served-model-name "$MODEL_PATH" \
    --tp "$TP_SIZE" \
    --video-generation-worker \
142
    --media-output-fs-url "$FS_URL" \
143
144
145
146
    --trust-remote-code \
    --skip-tokenizer-init \
    --enable-metrics \
    "${EXTRA_ARGS[@]}"