run_benchmark.sh 5.54 KB
Newer Older
jerrrrry's avatar
jerrrrry committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
#!/bin/bash

# 初始化目录
mkdir -p /workspace/test/inference_outputs/results 
mkdir -p /workspace/test/inference_outputs/logs/server
mkdir -p /workspace/test/inference_outputs/logs/models
# 基础端口
BASE_PORT=8001

# 读取配置文件(分号分隔)
while IFS=';' read -r model_name model_path tp data_type batch_list prompt_pairs max_model_len gpu_mem_util; do
    # 清理参数(去除空格和引号)
    model_name=$(echo "$model_name" | xargs)
    model_path=$(echo "$model_path" | xargs)
    tp=$(echo "$tp" | xargs)
    data_type=$(echo "$data_type" | xargs)
    batch_list=$(echo "$batch_list" | tr -d '"' | xargs)
    prompt_pairs=$(echo "$prompt_pairs" | tr -d '()"' | xargs)
    max_model_len=$(echo "$max_model_len" | xargs)
    gpu_mem_util=$(echo "$gpu_mem_util" | xargs)

    # 动态分配端口
    port=$((BASE_PORT++))

    # 生成 server.sh
    cat > "/workspace/test/inference_outputs/server_${model_name}_tp${tp}.sh" <<EOF
#!/bin/bash


export HIP_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
export ALLREDUCE_STREAM_WITH_COMPUTE=1
export NCCL_MIN_NCHANNELS=16
export NCCL_MAX_NCHANNELS=16
export VLLM_PCIE_USE_CUSTOM_ALLREDUCE=1
export VLLM_USE_TRITON_PREFIX_FLASH_ATTN=1

export VLLM_NUMA_BIND=1
export VLLM_RANK0_NUMA=0
export VLLM_RANK1_NUMA=1
export VLLM_RANK2_NUMA=2
export VLLM_RANK3_NUMA=3
export VLLM_RANK4_NUMA=4
export VLLM_RANK5_NUMA=5
export VLLM_RANK6_NUMA=6
export VLLM_RANK7_NUMA=7
export VLLM_USE_TRITON_PREFIX_FLASH_ATTN=1
vllm serve "$model_path" --trust-remote-code \\
--enable-prefix-caching \\
--dtype $data_type \\
--tensor-parallel-size $tp \\
--max-model-len $max_model_len \\
--port $port \\
--gpu-memory-utilization $gpu_mem_util
EOF

    # 赋予执行权限
    chmod +x "/workspace/test/inference_outputs/server_${model_name}_tp${tp}.sh"
    echo "Generated server script for ${model_name}_tp${tp} at /workspace/test/inference_outputs/server_${model_name}_tp${tp}.sh"
    
    # 1. 启动 vLLM 服务,并记录日志到 server.log
    /workspace/test/inference_outputs/server_${model_name}_tp${tp}.sh > "/workspace/test/inference_outputs/logs/server/${model_name}_tp${tp}_server.log" 2>&1 &
    SERVER_PID=$!
    
    # 2. 改进的日志检测函数
    check_server_status() {
        local log_file=$1
        local server_pid=$2
jerrrrry's avatar
jerrrrry committed
68
        local success_msg="Starting vLLM API server"
jerrrrry's avatar
jerrrrry committed
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
        local error_patterns=("RuntimeError" "ValueError" "segmentation fault" "core dumped" )

        # 检查成功消息
        if grep -q "$success_msg" "$log_file"; then
            echo "✅ Server started successfully!"
            return 0
        fi

        # 检查错误消息
        for pattern in "${error_patterns[@]}"; do
            if grep -i -q "$pattern" "$log_file"; then
                echo -e "\n❌ [$(date '+%Y-%m-%d %H:%M:%S')] Error detected in logs (pattern: $pattern)!"
                echo "===== ERROR CONTEXT ====="
                grep -i -A 5 -B 5 "$pattern" "$log_file" | tail -n 20
                echo "========================="
                return 1
            fi
        done

        # 检查进程是否存活
        if ! kill -0 $server_pid 2>/dev/null; then
            echo -e "\n❌ [$(date '+%Y-%m-%d %H:%M:%S')] Server process died unexpectedly!"
            echo "===== LAST LOG LINES ====="
            tail -n 20 "$log_file"
            echo "========================="
            return 1
        fi

        # 默认返回继续等待
        return 2
    }

    # 3. 等待服务器启动或失败
    echo -e "\n🔍 [$(date '+%Y-%m-%d %H:%M:%S')] Starting monitoring for ${model_name}_tp${tp} (PID: $SERVER_PID)"
    max_wait_seconds=20000
    start_time=$(date +%s)
    log_file="/workspace/test/inference_outputs/logs/server/${model_name}_tp${tp}_server.log"

    while true; do
        sleep 20  # 每20秒检查一次

        check_server_status "$log_file" "$SERVER_PID"
        status=$?

        # 成功状态
        if [ $status -eq 0 ]; then
            break
        fi

        # 失败状态
        if [ $status -eq 1 ]; then
            # 清理资源
            kill $SERVER_PID 2>/dev/null
            pkill -f "vllm serve" 2>/dev/null
            echo "🛑 Cleaned up resources after failure"
            # 直接继续下一个模型测试
            continue 2
        fi

        # 检查超时
        current_time=$(date +%s)
        elapsed=$((current_time - start_time))
        if [ $elapsed -ge $max_wait_seconds ]; then
            echo -e "\n⏰ [$(date '+%Y-%m-%d %H:%M:%S')] Timeout waiting for server to start!"
            # 清理资源
            kill $SERVER_PID 2>/dev/null
            pkill -f "vllm serve" 2>/dev/null
            echo "🛑 Cleaned up resources after timeout"
            # 直接继续下一个模型测试
            continue 2
        fi

        echo "Waiting... (${elapsed}s elapsed)"
    done

    # 4. 只有成功启动时才执行测试
    echo -e "\n🚀 [$(date '+%Y-%m-%d %H:%M:%S')] Running tests for ${model_name}_tp${tp}..."
    
    # 设置测试环境变量
    export MODEL_NAME="$model_name"
    export MODEL_PATH="$model_path"
    export TP="$tp"
    export DATA_TYPE="$data_type"
    export BATCH_LIST="$batch_list"
    export PROMPT_PAIRS="$prompt_pairs"
    export PORT="$port"
    
    # 运行测试
    ./test.sh
    
    # 5. 测试完成后清理
    kill $SERVER_PID
    pkill -f "vllm serve" 2>/dev/null
    echo "✅ [$(date '+%Y-%m-%d %H:%M:%S')] ${model_name}_tp${tp} test completed and cleaned up"
    
done < <(grep -v '^#' ../configs/model_to_test.cfg | grep -v '^$')
jerrrrry's avatar
jerrrrry committed
165
echo -e "\n📊 [$(date '+%Y-%m-%d %H:%M:%S')] All tests completed. Results saved to results/"