division.py 6.13 KB
Newer Older
jerrrrry's avatar
jerrrrry committed
1
2
#!/usr/bin/env python3
"""
jerrrrry's avatar
jerrrrry committed
3
4
5
6
universal_ratio_full.py
一键批量对比成对 Excel(含首 token 延迟)
python universal_ratio_full.py folder1 folder2 [folder3 ...]
输出:summary_ratio.xlsx(每张 Sheet 含 5 列百分比)
jerrrrry's avatar
jerrrrry committed
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
"""

import argparse
import os
import re
import pandas as pd
from openpyxl import Workbook
from openpyxl.styles import Alignment, Border, Side, Font, PatternFill
from openpyxl.utils import get_column_letter
DEFAULT_FONT = Font(name='Arial', size=10)
# ------------------ 样式 ------------------
thin = Side('thin')
border = Border(top=thin, bottom=thin, left=thin, right=thin)
center = Alignment(horizontal='center', vertical='center', wrap_text=True)

header_font = Font(bold=True, color='FFFFFF')
jerrrrry's avatar
jerrrrry committed
23
black_font = Font(name='微软雅黑', size=11, bold=True, color='000000')  # 000000代表黑色
jerrrrry's avatar
jerrrrry committed
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
blue_fill   = PatternFill('solid', fgColor='e0ffff')   # 纯蓝
orange_fill = PatternFill('solid', fgColor='faf0e6')   # 橙色
green_fill  = PatternFill('solid', fgColor='4de680')   # 绿色
red_fill    = PatternFill('solid', fgColor='FF7f50')   # 红色

# ================= 工具函数 =================
def build_pairs(folders):
    """按前缀(_tpX) 收集成对文件"""
    files_map = {}
    for folder in folders:
        for f in os.listdir(folder):
            if not f.endswith('.xlsx'):
                continue
            match = re.search(r'(.+_tp\d+)', f)
            if not match:
                continue
            prefix = match.group(1)
            files_map.setdefault(prefix, []).append(os.path.join(folder, f))
    return {k: v for k, v in files_map.items() if len(v) == 2}

def process_sheet(wb, prefix, path_A, path_B):
    df_A = pd.read_excel(path_A, header=None)
    df_B = pd.read_excel(path_B, header=None)

    # 对齐行数
    min_rows = min(len(df_A), len(df_B))
    df_A, df_B = df_A.iloc[:min_rows], df_B.iloc[:min_rows]

jerrrrry's avatar
jerrrrry committed
52
53
54
55
56
57
58
59
60
    # 列索引(总、生成、首token、单路、不带首)
    pct_cols = [5, 6, 7, 10, 11]
    titles   = [
        '总吞吐量(%)',
        '生成吞吐量(%)',
        '首token延迟(%)',
        '单路生成吞吐(%)',
        '不带首字生成吞吐(%)'
    ]
jerrrrry's avatar
jerrrrry committed
61
62
63

    pct_rows, pct_df = [], pd.DataFrame()
    for r in range(2, len(df_A)):
jerrrrry's avatar
jerrrrry committed
64
65
66
67
68
69
70
71
72
73
        a_vals = [df_A.iloc[r, c] for c in pct_cols]
        b_vals = [df_B.iloc[r, c] for c in pct_cols]
        pct = []
        for i, (a, b) in enumerate(zip(a_vals, b_vals)):
            if i == 2:  # 首 token 延迟:倒序
                pct.append(round(b / a * 100, 2) if a else None)
            else:
                pct.append(round(a / b * 100, 2) if b else None)
        pct_rows.append(pct)

jerrrrry's avatar
jerrrrry committed
74
75
    pct_df = pd.DataFrame(pct_rows, columns=titles)
    avg = pct_df.mean().round(2).tolist()
jerrrrry's avatar
jerrrrry committed
76
    max_row_ws = [idx + 3 for idx in pct_df.idxmax()]
jerrrrry's avatar
jerrrrry committed
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
    min_row_ws = [idx + 3 for idx in pct_df.idxmin()]

    ws = wb.create_sheet(title=prefix)
    rows_A, rows_B = df_A.values.tolist(), df_B.values.tolist()
    k_cols, l_cols = len(df_A.columns), len(df_B.columns)

    # ---------- 表A ----------
    for r_idx, row in enumerate(rows_A, 1):
        for c_idx, val in enumerate(row, 1):
            ws.cell(row=r_idx, column=c_idx, value=val)
    ws.merge_cells(start_row=1, start_column=1, end_row=1, end_column=k_cols)
    ws.cell(row=1, column=1, value=os.path.basename(path_A)).font = header_font
    ws.cell(row=1, column=1).fill = orange_fill
    ws.cell(row=1, column=1).alignment = center

    # 空白1
    blank1 = k_cols + 1
    for r in range(1, len(rows_A) + 1):
        ws.cell(row=r, column=blank1, value=None)

    # ---------- 表B ----------
    l_start = blank1 + 1
    for r_idx, row in enumerate(rows_B, 1):
        for c_idx, val in enumerate(row, 1):
            ws.cell(row=r_idx, column=l_start + c_idx - 1, value=val)
    ws.merge_cells(start_row=1, start_column=l_start, end_row=1, end_column=l_start + l_cols - 1)
    ws.cell(row=1, column=l_start, value=os.path.basename(path_B)).font = header_font
    ws.cell(row=1, column=l_start).fill = orange_fill
    ws.cell(row=1, column=l_start).alignment = center

    # 空白2
    blank2 = l_start + l_cols
    for r in range(1, len(rows_A) + 1):
        ws.cell(row=r, column=blank2, value=None)

jerrrrry's avatar
jerrrrry committed
112
113
114

    # 百分比区
    pct_start = blank2 + 1  # 数据从 blank2+2 列开始
jerrrrry's avatar
jerrrrry committed
115
    for c_idx, title in enumerate(titles, 0):
jerrrrry's avatar
jerrrrry committed
116
117
118
119
        ws.cell(row=2, column=pct_start + c_idx, value=title).font = header_font
        ws.cell(row=2, column=pct_start + c_idx).fill = blue_fill

    # 写入数据
jerrrrry's avatar
jerrrrry committed
120
121
122
123
    for r_idx, vals in enumerate(pct_rows, 0):
        for c_idx, val in enumerate(vals, 0):
            ws.cell(row=3 + r_idx, column=pct_start + c_idx, value=val)

jerrrrry's avatar
jerrrrry committed
124
    # 平均值行(关键修改:标签列向左移动)
jerrrrry's avatar
jerrrrry committed
125
    avg_row = 3 + len(pct_rows) + 1
jerrrrry's avatar
jerrrrry committed
126
    ws.cell(row=avg_row, column=pct_start - 1, value='平均值').font = black_font  # 改为-2
jerrrrry's avatar
jerrrrry committed
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
    for c_idx, val in enumerate(avg, 0):
        ws.cell(row=avg_row, column=pct_start + c_idx, value=val)

    # 高亮最大最小
    for c_idx, col_name in enumerate(titles, 0):
        max_cell = ws.cell(row=max_row_ws[c_idx], column=pct_start + c_idx)
        min_cell = ws.cell(row=min_row_ws[c_idx], column=pct_start + c_idx)
        max_cell.fill = green_fill
        min_cell.fill = red_fill

    # 通用美化
    for row in ws.iter_rows():
        for cell in row:
            cell.alignment = center
            cell.border = border
            cell.font = DEFAULT_FONT
    for col in ws.columns:
        max_len = max(len(str(cell.value or '')) for cell in col) + 2
        ws.column_dimensions[get_column_letter(col[0].column)].width = max_len

def main():
jerrrrry's avatar
jerrrrry committed
148
    parser = argparse.ArgumentParser(description='批量生成比例对比表(含首 token 延迟)')
jerrrrry's avatar
jerrrrry committed
149
150
151
152
153
154
155
156
157
    parser.add_argument('folders', nargs='+', help='两个或多个文件夹路径')
    args = parser.parse_args()

    pairs = build_pairs(args.folders)
    if not pairs:
        print('未找到可配对文件')
        return

    wb = Workbook()
jerrrrry's avatar
jerrrrry committed
158
    wb.remove(wb.active)
jerrrrry's avatar
jerrrrry committed
159
160
161
162
163
164
165
166
167
    for prefix, paths in pairs.items():
        process_sheet(wb, prefix, *paths)

    output_name = os.getenv('OUTPUT') or 'summary_ratio.xlsx'
    wb.save(output_name)
    print('全部完成✅')

if __name__ == '__main__':
    main()