#!/usr/bin/env python3 # -*- coding: utf-8 -*- def process_data(filename): """处理数据文件,返回处理后的数据字典和数据大小列表""" data = {} with open(filename, 'r') as f: for line in f: line = line.strip() if not line: continue parts = line.split(',') # 新格式: operation, algo, proto, nbytes, timeUsec if len(parts) >= 5: try: operation = parts[0].strip() algo = parts[1].strip() proto = parts[2].strip() size_bytes = int(parts[3].strip()) value = float(parts[4].strip()) key = (operation, algo, proto) data.setdefault(key, []).append((size_bytes, value)) except (ValueError, IndexError): continue # 对每组数据两两取最小值,并且同时保存 size arrays = {} sizes_dict = {} for key in data: compressed_vals = [] compressed_sizes = [] values = data[key] for i in range(0, len(values)-1, 2): size1, v1 = values[i] size2, v2 = values[i+1] # 保证 size 一致才压缩 if size1 == size2: compressed_vals.append(min(v1, v2)) compressed_sizes.append(size1) arrays[key] = compressed_vals sizes_dict[key] = compressed_sizes return arrays, sizes_dict def analyze_results(arrays, sizes_dict): results = [] # 自动识别所有操作类型 operations = sorted(set(key[0] for key in arrays.keys())) for operation in operations: # 寻找该 operation 下所有的 key keys_for_op = [k for k in arrays.keys() if k[0] == operation] if not keys_for_op: continue # 期望的基线 key baseline_key = (operation, "Default", "Default") if baseline_key not in arrays: # 优先找 algo == "Default" cand = next((k for k in keys_for_op if k[1] == "Default"), None) if cand: baseline_key = cand print(f"Info: 对于操作 {operation},未找到 (Default,Default) 基线,使用 {baseline_key} 作为基线(找到 algo=='Default')。") else: # 否则选择该 operation 下样本点最多的 key 作为基线(长度最长) cand = max(keys_for_op, key=lambda k: len(arrays.get(k, []))) baseline_key = cand print(f"Info: 对于操作 {operation},未找到 (Default,Default) 基线,回退使用 {baseline_key} 作为基线(样本点最多)。") # 作为比较对象的类型:该 operation 下除基线外的所有组合 compare_types = [k for k in keys_for_op if k != baseline_key] # 如果没有其他组合可比,则跳过 if not compare_types: print(f"Warning: 操作 {operation} 没有可比较的类型(除基线 {baseline_key} 外)。跳过。") continue # 对齐长度:基线与所有比较类型的最小长度 try: min_len = min(len(arrays.get(baseline_key, [])), *(len(arrays.get(t, [])) for t in compare_types)) except ValueError: # 意外情况(比如某些列表为空),跳过 print(f"Warning: 操作 {operation} 数据长度对齐失败,跳过。") continue if min_len == 0: print(f"Warning: 操作 {operation} 基线或比较类型没有有效数据点(min_len=0),跳过。") continue baseline = arrays[baseline_key][:min_len] sizes = sizes_dict.get(baseline_key, [])[:min_len] # 做比较 for i in range(min_len): min_type = None min_val = float('inf') for t in compare_types: if i < len(arrays.get(t, [])): val = arrays[t][i] if val < min_val: min_val = val min_type = t if min_type and baseline[i] > 0: diff = (baseline[i] - min_val) / baseline[i] if diff > 0.035: results.append((operation, i, sizes[i], baseline[i], min_val, min_type, diff)) return results def save_results(results, output_file="qz.txt"): """保存 size 非零的结果到文件(追加模式)""" with open(output_file, "a") as f: # 修改为 "a" 模式以追加内容 for operation, idx, size, base, other, t, diff in results: if size != 0: # 过滤掉 Size=0 的条目 f.write( f"Operation: {operation}, Size={size} bytes, " f"baseline={base:.2f}, min_other={other:.2f} " f"({t[1]},{t[2]}), diff={diff*100:.2f}%\n" ) if __name__ == "__main__": arrays, sizes = process_data("shuju.txt") results = analyze_results(arrays, sizes) save_results(results) print(f"找到 {len(results)} 条有效比较结果,已保存到 qz.txt") if not results: print("\n可能原因:") print("1. 没有算法组合比Default快3%以上") print("2. 数据分组不一致导致比较失败") print("3. 数据文件中缺少某些操作类型或算法组合的数据") else: # 按操作类型统计结果 operation_stats = {} for result in results: operation = result[0] operation_stats[operation] = operation_stats.get(operation, 0) + 1 print("\n各操作类型有效比较结果统计:") for op, count in operation_stats.items(): print(f" {op}: {count} 条")