#!/bin/bash HOST=127.0.0.1 PORT=8081 TIMEOUT=1200 # 监控超时设置 INTERVAL=60 # 监控时间间隔 ENDPOINT=/v1/completions # 读取json配置文件 json_data=$(cat auto_quick_check_config.json) DCU=$(echo $json_data | jq -r '.DCU') vllm_version=$(echo $json_data | jq -r '.vllm_version') pkg_version=$(echo $json_data | jq -r '.pkg_version') dst_path=$(echo $json_data | jq -r '.dst_path') items=$(echo $json_data | jq -c '.items[]') while read -r item; do model_name=$(echo "$item" | jq -r '.model_name') model_path=$(echo "$item" | jq -r '.model_path') dtype=$(echo "$item" | jq -r '.dtype') tensor_parallel=$(echo "$item" | jq -r '.tensor_parallel') seqlen_len=$(echo "$item" | jq -r '.seqlen_tuple | length') ttft_len=$(echo "$item" | jq -r '.ttft_thres | length') tpot_len=$(echo "$item" | jq -r '.tpot_thres | length') if ! [[ $seqlen_len -eq $tpot_len && $seqlen_len -eq $tpot_len ]]; then echo "***********************************" echo "测试项:模型 ${model_name} " echo "输入输出序列、ttft阈值、tpot阈值数量存在不一致,无法测试最大并发量" echo "跳过该测试项" echo "***********************************" continue fi result_path=${dst_path}/${model_name}/ if [ ! -f ${result_path} ]; then mkdir ${result_path} -p fi if [ -e "${result_path}output.csv" ] && [ -s "${result_path}output.csv" ]; then : else echo "model_name,DCU,DCU nums,precision,input_len,output_len,bs,TTFT_mean(ms),TPOT_mean(ms),ITL_mean(ms),GenerateThroughput(tokens/s),TotalThroughput(tokens/s),Duration(s),OutputThroughputPerBS(tokens/s),DecodeThroughputPerBS(tokens/s),version" > ${result_path}output.csv fi echo $tensor_parallel | jq -c '.[]' | while read -r tp; do # 运行服务端启动脚本 nohup bash run_apiserver.sh $model_name $model_path $tp $dtype $HOST $PORT $result_path & start_time=$(date +%s) while true; do if nc -zv localhost $PORT; then # 检查端口是否打开 for ((i=0; i&1 | tee ${result_path}/${model_name}-tp-${tp}-input_len-${input_len}-output_len-${output_len}-ttft_thre-${ttft_thre}-tpot_thre-${tpot_thre}-bs-${bs}.log output_throughput=`grep -a "^Output token throughput (tok/s):" ${result_path}/${model_name}-tp-${tp}-input_len-${input_len}-output_len-${output_len}-ttft_thre-${ttft_thre}-tpot_thre-${tpot_thre}-bs-${bs}.log | awk -F ' ' '{print $5}'` total_throughput=`grep -a "^Total Token throughput (tok/s):" ${result_path}/${model_name}-tp-${tp}-input_len-${input_len}-output_len-${output_len}-ttft_thre-${ttft_thre}-tpot_thre-${tpot_thre}-bs-${bs}.log | awk -F ' ' '{print $5}'` TTFT_mean=`grep -a "^Mean TTFT (ms):" ${result_path}/${model_name}-tp-${tp}-input_len-${input_len}-output_len-${output_len}-ttft_thre-${ttft_thre}-tpot_thre-${tpot_thre}-bs-${bs}.log | awk -F ' ' '{print $4}'` TPOT_mean=`grep -a "^Mean TPOT (ms):" ${result_path}/${model_name}-tp-${tp}-input_len-${input_len}-output_len-${output_len}-ttft_thre-${ttft_thre}-tpot_thre-${tpot_thre}-bs-${bs}.log | awk -F ' ' '{print $4}'` ITL_mean=`grep -a "^Mean ITL (ms):" ${result_path}/${model_name}-tp-${tp}-input_len-${input_len}-output_len-${output_len}-ttft_thre-${ttft_thre}-tpot_thre-${tpot_thre}-bs-${bs}.log | awk -F ' ' '{print $4}'` duration=`grep -a "^Benchmark duration (s):" ${result_path}/${model_name}-tp-${tp}-input_len-${input_len}-output_len-${output_len}-ttft_thre-${ttft_thre}-tpot_thre-${tpot_thre}-bs-${bs}.log | awk -F ' ' '{print $4}'` output_throughput_per_bs=$(python -c "print(round(${output_throughput} / ${bs}, 2))") decode_throughput_per_bs=$(python -c "print(round(1000.0 / ${TPOT_mean}, 2))") echo "$model_name,$DCU,$tp,$dtype,$input_len,$output_len,$bs,$TTFT_mean,$TPOT_mean,$ITL_mean,$output_throughput,$total_throughput,$duration,$output_throughput_per_bs,$decode_throughput_per_bs,$pkg_version" >> ${result_path}output.csv sleep 10 condition=$(python <<- EOF try: ttft = float('$TTFT_mean') tpot = float('$TPOT_mean') ttft_thre = float('$ttft_thre') tpot_thre = float('$tpot_thre') print(1 if (ttft <= ttft_thre and tpot <= tpot_thre) else 0) except: print(0) EOF ) if [ "$condition" -eq 1 ]; then bs=$((bs + bs_interval)) else break fi done done break else current_time=$(date +%s) elapsed_time=$((current_time - start_time)) if [ $elapsed_time -ge $TIMEOUT ]; then echo "ERR:PORT ${PORT} launch time out, exit!!!。" exit 1 fi echo "PORT ${PORT} has not been launched yet, please wait...." sleep $INTERVAL fi done pkill -f vllm sleep 60 done done <<< "$items"