panduan.py 5.72 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
#!/usr/bin/env python3
# -*- coding: utf-8 -*-

def process_data(filename):
    """处理数据文件,返回处理后的数据字典和数据大小列表"""
    data = {}

    with open(filename, 'r') as f:
        for line in f:
            line = line.strip()
            if not line:
                continue
            parts = line.split(',')
            # 新格式: operation, algo, proto, nbytes, timeUsec
            if len(parts) >= 5:
                try:
                    operation = parts[0].strip()
                    algo = parts[1].strip()
                    proto = parts[2].strip()
                    size_bytes = int(parts[3].strip())
                    value = float(parts[4].strip())                    
                    key = (operation, algo, proto)
                    data.setdefault(key, []).append((size_bytes, value))
                except (ValueError, IndexError):
                    continue

    # 对每组数据两两取最小值,并且同时保存 size
    arrays = {}
    sizes_dict = {}
    for key in data:
        compressed_vals = []
        compressed_sizes = []
        values = data[key]
        for i in range(0, len(values)-1, 2):
            size1, v1 = values[i]
            size2, v2 = values[i+1]
            # 保证 size 一致才压缩
            if size1 == size2:
                compressed_vals.append(min(v1, v2))
                compressed_sizes.append(size1)
        arrays[key] = compressed_vals
        sizes_dict[key] = compressed_sizes

    return arrays, sizes_dict


def analyze_results(arrays, sizes_dict):
    results = []

    # 自动识别所有操作类型
    operations = sorted(set(key[0] for key in arrays.keys()))

    for operation in operations:
        # 寻找该 operation 下所有的 key
        keys_for_op = [k for k in arrays.keys() if k[0] == operation]
        if not keys_for_op:
            continue

        # 期望的基线 key
        baseline_key = (operation, "Default", "Default")
        if baseline_key not in arrays:
            # 优先找 algo == "Default"
            cand = next((k for k in keys_for_op if k[1] == "Default"), None)
            if cand:
                baseline_key = cand
                print(f"Info: 对于操作 {operation},未找到 (Default,Default) 基线,使用 {baseline_key} 作为基线(找到 algo=='Default')。")
            else:
                # 否则选择该 operation 下样本点最多的 key 作为基线(长度最长)
                cand = max(keys_for_op, key=lambda k: len(arrays.get(k, [])))
                baseline_key = cand
                print(f"Info: 对于操作 {operation},未找到 (Default,Default) 基线,回退使用 {baseline_key} 作为基线(样本点最多)。")

        # 作为比较对象的类型:该 operation 下除基线外的所有组合
        compare_types = [k for k in keys_for_op if k != baseline_key]

        # 如果没有其他组合可比,则跳过
        if not compare_types:
            print(f"Warning: 操作 {operation} 没有可比较的类型(除基线 {baseline_key} 外)。跳过。")
            continue

        # 对齐长度:基线与所有比较类型的最小长度
        try:
            min_len = min(len(arrays.get(baseline_key, [])), *(len(arrays.get(t, [])) for t in compare_types))
        except ValueError:
            # 意外情况(比如某些列表为空),跳过
            print(f"Warning: 操作 {operation} 数据长度对齐失败,跳过。")
            continue
 
        if min_len == 0:
            print(f"Warning: 操作 {operation} 基线或比较类型没有有效数据点(min_len=0),跳过。")
            continue

        baseline = arrays[baseline_key][:min_len]
        sizes = sizes_dict.get(baseline_key, [])[:min_len]

        # 做比较
        for i in range(min_len):
            min_type = None
            min_val = float('inf')
            for t in compare_types:
                if i < len(arrays.get(t, [])):
                    val = arrays[t][i]
                    if val < min_val:
                        min_val = val
                        min_type = t
            if min_type and baseline[i] > 0:
                diff = (baseline[i] - min_val) / baseline[i]
                if diff > 0.035:
                    results.append((operation, i, sizes[i], baseline[i], min_val, min_type, diff))

    return results


def save_results(results, output_file="qz.txt"):
    """保存 size 非零的结果到文件(追加模式)"""
    with open(output_file, "a") as f:  # 修改为 "a" 模式以追加内容
        for operation, idx, size, base, other, t, diff in results:
            if size != 0:  # 过滤掉 Size=0 的条目
                f.write(
                    f"Operation: {operation}, Size={size} bytes, "
                    f"baseline={base:.2f}, min_other={other:.2f} "
                    f"({t[1]},{t[2]}), diff={diff*100:.2f}%\n"
                )


if __name__ == "__main__":
    arrays, sizes = process_data("shuju.txt")
    results = analyze_results(arrays, sizes)
    save_results(results)
    print(f"找到 {len(results)} 条有效比较结果,已保存到 qz.txt")
    
    if not results:
        print("\n可能原因:")
        print("1. 没有算法组合比Default快3%以上")
        print("2. 数据分组不一致导致比较失败")
        print("3. 数据文件中缺少某些操作类型或算法组合的数据")
    else:
        # 按操作类型统计结果
        operation_stats = {}
        for result in results:
            operation = result[0]
            operation_stats[operation] = operation_stats.get(operation, 0) + 1
        
        print("\n各操作类型有效比较结果统计:")
        for op, count in operation_stats.items():
            print(f"  {op}: {count} 条")