run_benchmarks.sh 2.29 KB
Newer Older
sunzhq2's avatar
init  
sunzhq2 committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
#!/bin/bash
# run_performance_test.sh - 性能测试并保存详细回答

set -e

if [ ! -d "./results" ]; then
    mkdir ./results
fi

rm -rf ./results/*
# ========== 配置 ==========
BASE_URL="http://0.0.0.0:8000"
MODEL_NAME="qwen3-8B"
LOCAL_MODEL_PATH="/data2/models/qwen3-8B"
DATASET_JSONL="./MATH-500/test.jsonl"
PROMPTS_FILE="./results/math500_prompts.jsonl"
RESULT_DIR="./results/performance_results"
RESULT_FILENAME="qwen3_8b_math500_perf.json"
REQUEST_RATE=10
# ===========================

# 1. 检查服务
echo ">>> 检查 vLLM 服务状态..."
if ! curl -s "${BASE_URL}/health" > /dev/null; then
    echo ">>> 错误:无法连接到 ${BASE_URL}"
    exit 1
fi
echo ">>> 服务正常。"

# 2. 生成 JSONL 提示词文件(包含 sampling_params)
if [ -f "$PROMPTS_FILE" ]; then
    echo ">>> 旧的提示词文件已存在,删除并重新生成。"
    rm -f "$PROMPTS_FILE"
fi

echo ">>> 正在从 $DATASET_JSONL 生成 JSONL 格式提示词文件..."
python - "$DATASET_JSONL" "$PROMPTS_FILE" << 'EOF'
import sys, json

with open(sys.argv[1], 'r', encoding='utf-8') as f_in, \
     open(sys.argv[2], 'w', encoding='utf-8') as f_out:
    for line in f_in:
        item = json.loads(line)
        prompt_text = f"{item['problem']}\n\nPlease reason step by step, and put your final answer within \\boxed{{}}."
        record = {
            "prompt": prompt_text,
            "metadata": {"unique_id": item["unique_id"]}
        }
        f_out.write(json.dumps(record, ensure_ascii=False) + '\n')
EOF
# "sampling_params": {"max_tokens": 8192},
echo ">>> JSONL 提示词文件已保存至 $PROMPTS_FILE"

# 3. 执行性能测试(带详细保存)
echo ">>> 开始性能压测并保存详细结果..."
vllm bench serve \
    --backend vllm \
    --base-url "$BASE_URL" \
    --model "$MODEL_NAME" \
    --tokenizer "$LOCAL_MODEL_PATH" \
    --dataset-name custom \
    --dataset-path "$PROMPTS_FILE" \
    --num-prompts 500 \
    --request-rate "$REQUEST_RATE" \
    --save-result \
    --save-detailed \
    --result-dir "$RESULT_DIR" \
    --result-filename "$RESULT_FILENAME" \
    --custom-output-len 32768 \
    --metadata \
    --max-concurrency 256 \
    --temperature 0.0
    # --top_p 0.6

echo ">>> 性能测试完成!结果目录: $RESULT_DIR"
echo ">>> 汇总性能指标: $RESULT_DIR/$RESULT_FILENAME"