#!/bin/bash # run_performance_test.sh - 性能测试并保存详细回答 set -e if [ ! -d "./results" ]; then mkdir ./results fi rm -rf ./results/* # ========== 配置 ========== BASE_URL="http://0.0.0.0:8000" MODEL_NAME="qwen3-8B" LOCAL_MODEL_PATH="/data2/models/qwen3-8B" DATASET_JSONL="./MATH-500/test.jsonl" PROMPTS_FILE="./results/math500_prompts.jsonl" RESULT_DIR="./results/performance_results" RESULT_FILENAME="qwen3_8b_math500_perf.json" REQUEST_RATE=10 # =========================== # 1. 检查服务 echo ">>> 检查 vLLM 服务状态..." if ! curl -s "${BASE_URL}/health" > /dev/null; then echo ">>> 错误:无法连接到 ${BASE_URL}" exit 1 fi echo ">>> 服务正常。" # 2. 生成 JSONL 提示词文件(包含 sampling_params) if [ -f "$PROMPTS_FILE" ]; then echo ">>> 旧的提示词文件已存在,删除并重新生成。" rm -f "$PROMPTS_FILE" fi echo ">>> 正在从 $DATASET_JSONL 生成 JSONL 格式提示词文件..." python - "$DATASET_JSONL" "$PROMPTS_FILE" << 'EOF' import sys, json with open(sys.argv[1], 'r', encoding='utf-8') as f_in, \ open(sys.argv[2], 'w', encoding='utf-8') as f_out: for line in f_in: item = json.loads(line) prompt_text = f"{item['problem']}\n\nPlease reason step by step, and put your final answer within \\boxed{{}}." record = { "prompt": prompt_text, "metadata": {"unique_id": item["unique_id"]} } f_out.write(json.dumps(record, ensure_ascii=False) + '\n') EOF # "sampling_params": {"max_tokens": 8192}, echo ">>> JSONL 提示词文件已保存至 $PROMPTS_FILE" # 3. 执行性能测试(带详细保存) echo ">>> 开始性能压测并保存详细结果..." vllm bench serve \ --backend vllm \ --base-url "$BASE_URL" \ --model "$MODEL_NAME" \ --tokenizer "$LOCAL_MODEL_PATH" \ --dataset-name custom \ --dataset-path "$PROMPTS_FILE" \ --num-prompts 500 \ --request-rate "$REQUEST_RATE" \ --save-result \ --save-detailed \ --result-dir "$RESULT_DIR" \ --result-filename "$RESULT_FILENAME" \ --custom-output-len 32768 \ --metadata \ --max-concurrency 256 \ --temperature 0.0 # --top_p 0.6 echo ">>> 性能测试完成!结果目录: $RESULT_DIR" echo ">>> 汇总性能指标: $RESULT_DIR/$RESULT_FILENAME"