online_quick_check.sh 4.88 KB
Newer Older
liuxu3's avatar
liuxu3 committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
#!/bin/bash

HOST=127.0.0.1
PORT=8081
TIMEOUT=1200        # 监控超时设置
INTERVAL=60         # 监控时间间隔
ENDPOINT=/v1/completions

# 读取json配置文件
json_data=$(cat auto_quick_check_config.json)

DCU=$(echo $json_data | jq -r '.DCU')
vllm_version=$(echo $json_data | jq -r '.vllm_version')
pkg_version=$(echo $json_data | jq -r '.pkg_version')
dst_path=$(echo $json_data | jq -r '.dst_path')
items=$(echo $json_data | jq -c '.items[]')

while read -r item; do
    model_name=$(echo "$item" | jq -r '.model_name')
    model_path=$(echo "$item" | jq -r '.model_path')
    dtype=$(echo "$item" | jq -r '.dtype')
    tensor_parallel=$(echo "$item" | jq -r '.tensor_parallel')

    batch_size=$(echo "$item" | jq -r '.batch_size')
    seqlen_tuple=$(echo "$item" | jq -r '.seqlen_tuple')

    result_path=${dst_path}/${model_name}/
    if [ ! -f ${result_path} ]; then
        mkdir ${result_path} -p
    fi

    if [ -e "${result_path}output.csv" ] && [ -s "${result_path}output.csv" ]; then
        :
    else
        echo "model_name,DCU,DCU nums,precision,input_len,output_len,bs,TTFT_mean(ms),TPOT_mean(ms),ITL_mean(ms),GenerateThroughput(tokens/s),TotalThroughput(tokens/s),Duration(s),OutputThroughputPerBS(tokens/s),DecodeThroughputPerBS(tokens/s),version" > ${result_path}output.csv
    fi

    echo $tensor_parallel | jq -c '.[]' | while read -r tp; do
        # 运行服务端启动脚本
        nohup bash run_apiserver.sh $model_name $model_path $tp $dtype $HOST $PORT $result_path &

        start_time=$(date +%s)
        while true; do
            if nc -zv $HOST $PORT; then         # 检查端口是否打开
                echo $seqlen_tuple | jq -c '.[]' | while read -r sq; do
                    echo $batch_size | jq -c '.[]' | while read -r bs; do
                        IFS=' ' read -r input_len output_len <<< ${sq//\"/}

                        # 运行评测脚本
                        vllm bench serve --model ${model_name} \
                            --dataset-name random \
                            --tokenizer ${model_path} \
                            --trust-remote-code \
                            --port ${PORT} \
                            --endpoint ${ENDPOINT} \
                            --random-input-len ${input_len} \
                            --random-output-len ${output_len} \
                            --ignore_eos \
                            --num-prompts ${bs} \
                            --max-concurrency ${bs} 2>&1 | tee ${result_path}/${model_name}-tp-${tp}-input_len-${input_len}-output_len-${output_len}-bs-${bs}.log

                        output_throughput=`grep -a "^Output token throughput (tok/s):" ${result_path}/${model_name}-tp-${tp}-input_len-${input_len}-output_len-${output_len}-bs-${bs}.log | awk -F ' ' '{print $5}'`
                        total_throughput=`grep -a "^Total Token throughput (tok/s):" ${result_path}/${model_name}-tp-${tp}-input_len-${input_len}-output_len-${output_len}-bs-${bs}.log | awk -F ' ' '{print $5}'`
liuxu3's avatar
liuxu3 committed
64
                        # vllm 0.15版本的话grep内容需要修改为"^Total token throughput (tok/s):",后续会更新
liuxu3's avatar
liuxu3 committed
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
                        TTFT_mean=`grep -a "^Mean TTFT (ms):" ${result_path}/${model_name}-tp-${tp}-input_len-${input_len}-output_len-${output_len}-bs-${bs}.log | awk -F ' ' '{print $4}'`
                        TPOT_mean=`grep -a "^Mean TPOT (ms):" ${result_path}/${model_name}-tp-${tp}-input_len-${input_len}-output_len-${output_len}-bs-${bs}.log | awk -F ' ' '{print $4}'`
                        ITL_mean=`grep -a "^Mean ITL (ms):" ${result_path}/${model_name}-tp-${tp}-input_len-${input_len}-output_len-${output_len}-bs-${bs}.log | awk -F ' ' '{print $4}'`
                        duration=`grep -a "^Benchmark duration (s):" ${result_path}/${model_name}-tp-${tp}-input_len-${input_len}-output_len-${output_len}-bs-${bs}.log | awk -F ' ' '{print $4}'`	
                        output_throughput_per_bs=$(python -c "print(round(${output_throughput} / ${bs}, 2))")
                        decode_throughput_per_bs=$(python -c "print(round(1000.0 / ${TPOT_mean}, 2))")

                        echo "$model_name,$DCU,$tp,$dtype,$input_len,$output_len,$bs,$TTFT_mean,$TPOT_mean,$ITL_mean,$output_throughput,$total_throughput,$duration,$output_throughput_per_bs,$decode_throughput_per_bs,$pkg_version" >> ${result_path}output.csv
                        sleep 10
                    done
                done
                break
            else
                current_time=$(date +%s)
                elapsed_time=$((current_time - start_time))
                if [ $elapsed_time -ge $TIMEOUT ]; then
                    echo "ERR:PORT ${PORT} launch time out, exit!!!。"
                    exit 1
                fi
                echo "PORT ${PORT} has not been launched yet, please wait...."
                sleep $INTERVAL
            fi
        done

        pkill -f vllm
        sleep 60
    done

done <<< "$items"