online_quick_check_maxbs.sh 6.64 KB
Newer Older
liuxu3's avatar
liuxu3 committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
#!/bin/bash

HOST=127.0.0.1
PORT=8081
TIMEOUT=1200        # 监控超时设置
INTERVAL=60         # 监控时间间隔

# 读取json配置文件
json_data=$(cat auto_quick_check_config.json)

DCU=$(echo $json_data | jq -r '.DCU')
vllm_version=$(echo $json_data | jq -r '.vllm_version')
pkg_version=$(echo $json_data | jq -r '.pkg_version')
dst_path=$(echo $json_data | jq -r '.dst_path')
items=$(echo $json_data | jq -c '.items[]')

while read -r item; do
    model_name=$(echo "$item" | jq -r '.model_name')
    model_path=$(echo "$item" | jq -r '.model_path')
    dtype=$(echo "$item" | jq -r '.dtype')
    tensor_parallel=$(echo "$item" | jq -r '.tensor_parallel')

    seqlen_len=$(echo "$item" | jq -r '.seqlen_tuple  | length')
    ttft_len=$(echo "$item" | jq -r '.ttft_thres  | length')
    tpot_len=$(echo "$item" | jq -r '.tpot_thres  | length')

    if ! [[ $seqlen_len -eq $tpot_len && $seqlen_len -eq $tpot_len ]]; then
        echo "***********************************"
        echo "测试项:模型 ${model_name} "
        echo "输入输出序列、ttft阈值、tpot阈值数量存在不一致,无法测试最大并发量"
        echo "跳过该测试项"
        echo "***********************************"
        continue
    fi

    result_path=${dst_path}/${model_name}/
    if [ ! -f ${result_path} ]; then
        mkdir ${result_path} -p
    fi

    if [ -e "${result_path}output.csv" ] && [ -s "${result_path}output.csv" ]; then
        :
    else
        echo "model_name,DCU,DCU nums,precision,input_len,output_len,bs,TTFT_mean(ms),TPOT_mean(ms),ITL_mean(ms),GenerateThroughput(tokens/s),TotalThroughput(tokens/s),Duration(s),version" > ${result_path}output.csv
    fi

    echo $tensor_parallel | jq -c '.[]' | while read -r tp; do
        # 运行服务端启动脚本
        nohup bash run_apiserver.sh $model_name $model_path $tp $dtype $HOST $PORT $result_path &

        start_time=$(date +%s)
        while true; do
            if nc -zv localhost $PORT; then         # 检查端口是否打开
                for ((i=0; i<seqlen_len; i++)); do
                    seqlen=$(echo "$item" | jq -r ".seqlen_tuple[$i]")
                    ttft_thre=$(echo "$item" | jq -r ".ttft_thres[$i]")
                    tpot_thre=$(echo "$item" | jq -r ".tpot_thres[$i]")


                    IFS=' ' read -ra seq_parts <<< "$seqlen"
                    input_len=${seq_parts[0]}
                    output_len=${seq_parts[1]}

                    if [[ "$ttft_thre" == "inf" && "$tpot_thre" == "inf" ]]; then
                        echo "***********************************"
                        echo "测试项:模型 ${model_name} 输入 ${input_len} 输出 ${output_len}"
                        echo "需要设置ttft或tpot阈值(不能都为inf),否则无法测试最大并发量"
                        echo "跳过该测试项"
                        echo "***********************************"
                        continue
                    fi

                    bs=$(echo "$item" | jq -r '.batch_size_start')
                    bs_interval=$(echo "$item" | jq -r '.batch_size_interval')

                    while true; do
                        # 运行评测脚本
                        python3 benchmarks/benchmark_serving.py \
                            --dataset-name random \
                            --tokenizer ${model_path} \
                            --trust-remote-code \
                            --model ${model_name} \
                            --port ${PORT} \
                            --random-input-len ${input_len} \
                            --random-output-len ${output_len} \
                            --ignore_eos \
                            --num-prompts ${bs} \
                            --max-concurrency ${bs} 2>&1 | tee ${result_path}/${model_name}-tp-${tp}-input_len-${input_len}-output_len-${output_len}-ttft_thre-${ttft_thre}-tpot_thre-${tpot_thre}-bs-${bs}.log

                        output_throughput=`grep -a "^Output token throughput (tok/s):" ${result_path}/${model_name}-tp-${tp}-input_len-${input_len}-output_len-${output_len}-ttft_thre-${ttft_thre}-tpot_thre-${tpot_thre}-bs-${bs}.log | awk -F ' ' '{print $5}'`
                        total_throughput=`grep -a "^Total Token throughput (tok/s):" ${result_path}/${model_name}-tp-${tp}-input_len-${input_len}-output_len-${output_len}-ttft_thre-${ttft_thre}-tpot_thre-${tpot_thre}-bs-${bs}.log | awk -F ' ' '{print $5}'`
                        TTFT_mean=`grep -a "^Mean TTFT (ms):" ${result_path}/${model_name}-tp-${tp}-input_len-${input_len}-output_len-${output_len}-ttft_thre-${ttft_thre}-tpot_thre-${tpot_thre}-bs-${bs}.log | awk -F ' ' '{print $4}'`
                        TPOT_mean=`grep -a "^Mean TPOT (ms):" ${result_path}/${model_name}-tp-${tp}-input_len-${input_len}-output_len-${output_len}-ttft_thre-${ttft_thre}-tpot_thre-${tpot_thre}-bs-${bs}.log | awk -F ' ' '{print $4}'`
                        ITL_mean=`grep -a "^Mean ITL (ms):" ${result_path}/${model_name}-tp-${tp}-input_len-${input_len}-output_len-${output_len}-ttft_thre-${ttft_thre}-tpot_thre-${tpot_thre}-bs-${bs}.log | awk -F ' ' '{print $4}'`
                        duration=`grep -a "^Benchmark duration (s):" ${result_path}/${model_name}-tp-${tp}-input_len-${input_len}-output_len-${output_len}-ttft_thre-${ttft_thre}-tpot_thre-${tpot_thre}-bs-${bs}.log | awk -F ' ' '{print $4}'`
                        # total_time=$(python -c "print(${total_time} / 4)")

                        echo "$model_name,$DCU,$tp,$dtype,$input_len,$output_len,$bs,$TTFT_mean,$TPOT_mean,$ITL_mean,$output_throughput,$total_throughput,$duration,$pkg_version" >> ${result_path}output.csv
                        sleep 10

                        condition=$(python <<- EOF
try:
    ttft = float('$TTFT_mean')
    tpot = float('$TPOT_mean')
    ttft_thre = float('$ttft_thre')
    tpot_thre = float('$tpot_thre')

    print(1 if (ttft <= ttft_thre and tpot <= tpot_thre) else 0)
except:
    print(0)
EOF
)
                        if [ "$condition" -eq 1 ]; then
                            bs=$((bs + bs_interval))
                        else
                            break
                        fi
                    done
                done
                break
            else
                current_time=$(date +%s)
                elapsed_time=$((current_time - start_time))
                if [ $elapsed_time -ge $TIMEOUT ]; then
                    echo "ERR:PORT ${PORT} launch time out, exit!!!。"
                    exit 1
                fi
                echo "PORT ${PORT} has not been launched yet, please wait...."
                sleep $INTERVAL
            fi
        done

        pkill -f vllm
        sleep 60
   done

done <<< "$items"