text-to-video-diffusion.sh 3.85 KB
Newer Older
1
2
3
#!/bin/bash
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
4
5
6
#
# Text-to-video generation with Wan2.1 models.
# GPUs: 1 (--wan-size 1b) or 2 (--wan-size 14b)
7
8

set -e
9
trap 'echo Cleaning up...; kill 0' EXIT
10

11
12
SCRIPT_DIR="$(dirname "$(readlink -f "$0")")"
source "$SCRIPT_DIR/../../../common/launch_utils.sh"
13
14
15

# Defaults
WAN_SIZE="1b"
16
FS_URL="file:///tmp/dynamo_media"
17
HTTP_PORT="${DYN_HTTP_PORT:-${HTTP_PORT:-8000}}"
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
NUM_FRAMES=17
HEIGHT=480
WIDTH=832
NUM_INFERENCE_STEPS=50

# Parse command line arguments
EXTRA_ARGS=()
while [[ $# -gt 0 ]]; do
    case $1 in
        --wan-size)
            WAN_SIZE="$2"
            shift 2
            ;;
        --fs-url)
            FS_URL="$2"
            shift 2
            ;;
        --http-port)
            HTTP_PORT="$2"
            shift 2
            ;;
        --num-frames)
            NUM_FRAMES="$2"
            shift 2
            ;;
        --height)
            HEIGHT="$2"
            shift 2
            ;;
        --width)
            WIDTH="$2"
            shift 2
            ;;
        --num-inference-steps)
            NUM_INFERENCE_STEPS="$2"
            shift 2
            ;;
        -h|--help)
            echo "Usage: $0 [OPTIONS]"
            echo ""
            echo "Launch a Dynamo T2V (text-to-video) worker with Wan models."
            echo ""
            echo "Options:"
            echo "  --wan-size <1b|14b>          Model size (default: 1b)"
62
            echo "  --fs-url <url>               Filesystem URL for media storage (default: file:///tmp/dynamo_media)"
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
            echo "  --http-port <port>            Frontend HTTP port (default: 8000)"
            echo "  --num-frames <n>              Default frame count for health check (default: 17)"
            echo "  --height <n>                  Video height (default: 480)"
            echo "  --width <n>                   Video width (default: 832)"
            echo "  --num-inference-steps <n>     Denoising steps (default: 50)"
            echo "  -h, --help                    Show this help message"
            echo ""
            echo "Additional flags are forwarded to dynamo.sglang."
            exit 0
            ;;
        *)
            EXTRA_ARGS+=("$1")
            shift
            ;;
    esac
done

# Select model and TP based on size
case "$WAN_SIZE" in
    1b|1B)
        MODEL_PATH="Wan-AI/Wan2.1-T2V-1.3B-Diffusers"
        TP_SIZE=1
        ;;
    14b|14B)
        MODEL_PATH="Wan-AI/Wan2.1-T2V-14B-Diffusers"
        TP_SIZE=2
        ;;
    *)
        echo "Error: --wan-size must be '1b' or '14b', got '$WAN_SIZE'"
        exit 1
        ;;
esac

96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
print_launch_banner --no-curl "Launching T2V Video Generation Worker" "$MODEL_PATH" "$HTTP_PORT" \
    "TP Size:     $TP_SIZE" \
    "FS URL:      $FS_URL" \
    "Resolution:  ${WIDTH}x${HEIGHT}"

print_curl_footer <<CURL
  curl http://localhost:${HTTP_PORT}/v1/videos \\
    -H 'Content-Type: application/json' \\
    -d '{
      "prompt": "${EXAMPLE_PROMPT_VISUAL}",
      "model": "${MODEL_PATH}",
      "seconds": 2,
      "size": "${WIDTH}x${HEIGHT}",
      "response_format": "url",
      "nvext": {
        "fps": 8,
        "num_frames": ${NUM_FRAMES},
        "num_inference_steps": ${NUM_INFERENCE_STEPS}
      }
    }'
CURL
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131

# Launch frontend
echo "Starting Dynamo Frontend on port $HTTP_PORT..."
python3 -m dynamo.frontend \
    --http-port "$HTTP_PORT" &

sleep 2

# Launch video generation worker
echo "Starting T2V Worker ($WAN_SIZE)..."
python3 -m dynamo.sglang \
    --model-path "$MODEL_PATH" \
    --served-model-name "$MODEL_PATH" \
    --tp "$TP_SIZE" \
    --video-generation-worker \
132
    --media-output-fs-url "$FS_URL" \
133
134
135
    --trust-remote-code \
    --skip-tokenizer-init \
    --enable-metrics \
136
137
138
139
    "${EXTRA_ARGS[@]}" &

# Exit on first worker failure; kill 0 in the EXIT trap tears down the rest
wait_any_exit