benchmark_utils.sh 2.27 KB
Newer Older
1
2
3
4
5
6
7
8
#!/bin/bash
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0

wait_for_model() {

    local model_host=$1
    local model_port=$2
9
10
11
12
13
    local n_prefill=${3:-1}
    local n_decode=${4:-1}
    local poll=${5:-1}
    local timeout=${6:-600}
    local report_every=${7:-60}
14
15

    local health_addr="http://${model_host}:${model_port}/health"
16
    echo "Polling ${health_addr} every ${poll} seconds to check whether ${n_prefill} prefills and ${n_decode} decodes are alive"
17
18
19
20
21

    local start_ts=$(date +%s)
    local report_ts=$(date +%s)

    while :; do
22
        # Curl timeout - our primary use case here is to launch it at the first node (localhost), so no timeout is needed.
23
        curl_result=$(curl ${health_addr} 2>/dev/null)
24
25
26
27
28
        # Python path - Use of `check_server_health.py` is self-constrained outside of any packaging.
        check_result=$(python3 /scripts/check_server_health.py $n_prefill $n_decode <<< $curl_result)
        if [[ $check_result == *"Model is ready."* ]]; then
            echo $check_result
            return 0
29
30
31
32
33
34
35
36
37
        fi

        time_now=$(date +%s)
        if [[ $((time_now - start_ts)) -ge $timeout ]]; then
            echo "Model did not get healthy in ${timeout} seconds"
            exit 2;
        fi

        if [[ $((time_now - report_ts)) -ge $report_every ]]; then
38
            echo $check_result
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
            report_ts=$time_now
        fi

        sleep $poll
    done
}

warmup_model() {
    service_host=$1
    service_port=$2
    served_model_name=$3
    model_path=$4
    config=$5

    IFS='x' read -r -a config_list <<< "$config"
    isl=${config_list[0]}
    osl=${config_list[1]}
    num_prompts=${config_list[2]}
    concurrency=${config_list[3]}
    request_rate=${config_list[4]}

    command=(
        python3 -m sglang.bench_serving
        --base-url "http://${service_host}:${service_port}"
        --model ${served_model_name} --tokenizer ${model_path}
        --backend sglang-oai
        --dataset-name random --random-input ${isl} --random-output ${osl}
        --random-range-ratio 1
        --num-prompts ${num_prompts} --request-rate ${request_rate} --max-concurrency ${concurrency}
    )

    echo "Config ${config}. Running command ${command[@]}"

    ${command[@]}
}