run_engines.sh 4.76 KB
Newer Older
Yan Ru Pei's avatar
Yan Ru Pei committed
1
2
3
4
5
6
7
8
9
10
#!/bin/bash

# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0

# Parse command-line arguments
NUM_WORKERS=8
MODEL_PATH="deepseek-ai/DeepSeek-R1-Distill-Llama-8B"
TENSOR_PARALLEL_SIZE=1
USE_MOCKERS=false
Yan Ru Pei's avatar
Yan Ru Pei committed
11
12
USE_PREFILLS=false
BASE_GPU_OFFSET=0
Yan Ru Pei's avatar
Yan Ru Pei committed
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
EXTRA_ARGS=()

# Parse arguments
while [[ $# -gt 0 ]]; do
    case $1 in
        --num-workers)
            NUM_WORKERS="$2"
            shift 2
            ;;
        --model-path)
            MODEL_PATH="$2"
            shift 2
            ;;
        --tensor-parallel-size)
            TENSOR_PARALLEL_SIZE="$2"
            shift 2
            ;;
        --mockers)
            USE_MOCKERS=true
            shift
            ;;
Yan Ru Pei's avatar
Yan Ru Pei committed
34
35
36
37
38
39
40
41
        --prefills)
            USE_PREFILLS=true
            shift
            ;;
        --base-gpu-offset)
            BASE_GPU_OFFSET="$2"
            shift 2
            ;;
Yan Ru Pei's avatar
Yan Ru Pei committed
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
        --)
            shift
            EXTRA_ARGS+=("$@")
            break
            ;;
        *)
            # Collect all other arguments as vLLM/mocker arguments
            EXTRA_ARGS+=("$1")
            shift
            ;;
    esac
done

# If no extra args provided, use defaults
if [ ${#EXTRA_ARGS[@]} -eq 0 ]; then
    if [ "$USE_MOCKERS" = true ]; then
        # Default args for mocker engine (only block-size needed as others are defaults)
        EXTRA_ARGS=(
            "--block-size" "64"
        )
    else
        # Default args for vLLM engine (explicitly include block-size)
        EXTRA_ARGS=(
            "--enforce-eager"
            "--max-num-batched-tokens" "16384"
            "--max-model-len" "32768"
            "--block-size" "64"
        )
    fi
fi

# Validate arguments
if ! [[ "$NUM_WORKERS" =~ ^[0-9]+$ ]] || [ "$NUM_WORKERS" -lt 1 ]; then
    echo "Error: NUM_WORKERS must be a positive integer"
    exit 1
fi

if ! [[ "$TENSOR_PARALLEL_SIZE" =~ ^[0-9]+$ ]] || [ "$TENSOR_PARALLEL_SIZE" -lt 1 ]; then
    echo "Error: TENSOR_PARALLEL_SIZE must be a positive integer"
    exit 1
fi

Yan Ru Pei's avatar
Yan Ru Pei committed
84
85
86
87
88
if ! [[ "$BASE_GPU_OFFSET" =~ ^[0-9]+$ ]]; then
    echo "Error: BASE_GPU_OFFSET must be a non-negative integer"
    exit 1
fi

Yan Ru Pei's avatar
Yan Ru Pei committed
89
90
# Calculate total GPUs needed
TOTAL_GPUS_NEEDED=$((NUM_WORKERS * TENSOR_PARALLEL_SIZE))
Yan Ru Pei's avatar
Yan Ru Pei committed
91
LAST_GPU=$((BASE_GPU_OFFSET + TOTAL_GPUS_NEEDED - 1))
Yan Ru Pei's avatar
Yan Ru Pei committed
92
93
echo "Configuration:"
echo "  Engine Type: $([ "$USE_MOCKERS" = true ] && echo "Mocker" || echo "vLLM")"
Yan Ru Pei's avatar
Yan Ru Pei committed
94
echo "  Worker Type: $([ "$USE_PREFILLS" = true ] && echo "Prefill" || echo "Decode")"
Yan Ru Pei's avatar
Yan Ru Pei committed
95
96
97
98
echo "  Workers: $NUM_WORKERS"
echo "  Model: $MODEL_PATH"
echo "  Tensor Parallel Size: $TENSOR_PARALLEL_SIZE"
echo "  Total GPUs needed: $TOTAL_GPUS_NEEDED"
Yan Ru Pei's avatar
Yan Ru Pei committed
99
echo "  GPU Range: $BASE_GPU_OFFSET-$LAST_GPU"
Yan Ru Pei's avatar
Yan Ru Pei committed
100
101
102
103
104
105
106
107
108
109
110
111
112
113
echo "  Engine args: ${EXTRA_ARGS[*]}"
echo ""

PIDS=()

cleanup() {
    echo -e "\nStopping all workers..."
    kill "${PIDS[@]}" 2>/dev/null
    wait
    exit 0
}

trap cleanup SIGINT SIGTERM

Yan Ru Pei's avatar
Yan Ru Pei committed
114
115
WORKER_TYPE=$([ "$USE_PREFILLS" = true ] && echo "prefill" || echo "decode")
echo "Starting $NUM_WORKERS $WORKER_TYPE workers..."
Yan Ru Pei's avatar
Yan Ru Pei committed
116
117
118

for i in $(seq 1 $NUM_WORKERS); do
    {
Yan Ru Pei's avatar
Yan Ru Pei committed
119
        echo "[${WORKER_TYPE^} Worker-$i] Starting..."
Yan Ru Pei's avatar
Yan Ru Pei committed
120

Yan Ru Pei's avatar
Yan Ru Pei committed
121
122
        # Calculate GPU indices for this worker (with base offset)
        START_GPU=$(( BASE_GPU_OFFSET + (i - 1) * TENSOR_PARALLEL_SIZE ))
Yan Ru Pei's avatar
Yan Ru Pei committed
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
        END_GPU=$(( START_GPU + TENSOR_PARALLEL_SIZE - 1 ))

        # Build CUDA_VISIBLE_DEVICES string
        if [ "$TENSOR_PARALLEL_SIZE" -eq 1 ]; then
            GPU_DEVICES="$START_GPU"
        else
            GPU_DEVICES=""
            for gpu in $(seq $START_GPU $END_GPU); do
                if [ -n "$GPU_DEVICES" ]; then
                    GPU_DEVICES="${GPU_DEVICES},$gpu"
                else
                    GPU_DEVICES="$gpu"
                fi
            done
        fi

        if [ "$USE_MOCKERS" = true ]; then
            # Run mocker engine (no GPU assignment needed)
            exec python -m dynamo.mocker \
                --model-path "$MODEL_PATH" \
                --endpoint dyn://test.mocker.generate \
                "${EXTRA_ARGS[@]}"
        else
Yan Ru Pei's avatar
Yan Ru Pei committed
146
            echo "[${WORKER_TYPE^} Worker-$i] Using GPUs: $GPU_DEVICES"
147
            # Run vLLM engine with PYTHONHASHSEED=0 for deterministic event IDs in KV-aware routing
Yan Ru Pei's avatar
Yan Ru Pei committed
148
149
150
151
152
153
154
155
            VLLM_ARGS=()
            VLLM_ARGS+=("--model" "$MODEL_PATH")
            VLLM_ARGS+=("--tensor-parallel-size" "$TENSOR_PARALLEL_SIZE")
            if [ "$USE_PREFILLS" = true ]; then
                VLLM_ARGS+=("--is-prefill-worker")
            fi
            VLLM_ARGS+=("${EXTRA_ARGS[@]}")

156
            exec env PYTHONHASHSEED=0 CUDA_VISIBLE_DEVICES=$GPU_DEVICES python -m dynamo.vllm \
Yan Ru Pei's avatar
Yan Ru Pei committed
157
                "${VLLM_ARGS[@]}"
Yan Ru Pei's avatar
Yan Ru Pei committed
158
159
160
        fi
    } &
    PIDS+=($!)
Yan Ru Pei's avatar
Yan Ru Pei committed
161
    echo "Started $WORKER_TYPE worker $i (PID: $!)"
Yan Ru Pei's avatar
Yan Ru Pei committed
162
163
164
165
166
done

echo "All workers started. Press Ctrl+C to stop."
wait
echo "All workers completed."