plot.py

import argparse, pandas as pd, matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt, re, os

def find_avg_row(df):
    for idx, row in df.iterrows():
        if any(isinstance(cell, str) and re.search(r'平均值|Average', str(cell)) for cell in row):
            return idx
    return df.index[-1]

def extract_values(df):
    idx = find_avg_row(df)
    vals = [float(v) for v in df.iloc[idx].dropna().iloc[-4:] if str(v).replace('.', '').isdigit()]
    if len(vals) != 4:
        vals = [float(v) for v in df.iloc[-1].dropna().iloc[-4:] if str(v).replace('.', '').isdigit()]
    if len(vals) != 4:
        raise ValueError("无法提取4个百分比")
    return {
        "Total_throughput": vals[3],
        "generate_throughput": vals[2],
        "singel_road_generate_throughput": vals[1],
        "generate_throughput_without_ttft": vals[0]
    }

def plot_summary(file_path, output_prefix):
    xls = pd.ExcelFile(file_path)
    summary = {}
    for sheet in xls.sheet_names:
        try:
            df = pd.read_excel(xls, sheet_name=sheet, header=None)
            summary[sheet] = extract_values(df)
        except Exception:
            continue

    if not summary:
        print("❌ 无数据可绘图")
        return

    labels = ["Total_throughput", "generate_throughput", 
              "singel_road_generate_throughput", "generate_throughput_without_ttft"]
    colors = ['#2E5984', '#FF8C42', '#4CB944', '#8C2F39']
    models = list(summary.keys())
    x = range(len(models))
    bar_width = 0.18

    plt.figure(figsize=(max(len(models)*1.2, 12), 8))
    
    # 绘制柱状图
    for i, lab in enumerate(labels):
        vals = [summary[m][lab] for m in models]
        offset = (i - 1.5) * bar_width
        bars = plt.bar([p + offset for p in x], vals, width=bar_width, label=lab, color=colors[i])
        
        # 添加带引导线的标注
        for bar, v in zip(bars, vals):
            height = bar.get_height()
            # 引导线起点（柱顶中心）
            line_start = (bar.get_x() + bar.get_width()/2, height)
            # 文本位置（柱顶上方5%处）
            text_pos = (line_start[0], height + max(vals)*0.05)
            
            # 绘制引导线
            plt.plot([line_start[0], text_pos[0]], [line_start[1], text_pos[1]], 
                    color='gray', linestyle='--', linewidth=0.8, alpha=0.7)
            
            # 添加带背景框的文本
            plt.text(text_pos[0], text_pos[1], f"{v:.1f}%",
                    ha='center', va='bottom',
                    fontsize=6,
                    rotation=60,
                    bbox=dict(facecolor='white', edgecolor='gray', boxstyle='round,pad=0.2'))

    plt.xticks(x, models, rotation=30, ha='right', fontsize=9)
    plt.ylabel("Percentage", fontsize=10)
    plt.title("Throughput Comparison", fontsize=12, pad=20)
    
    # 调整图例
    plt.legend(
        bbox_to_anchor=(1.02, 1),
        loc='upper left',
        fontsize=9,
        frameon=False
    )
    
    plt.subplots_adjust(right=0.8, bottom=0.15)

    for ext in ['png', 'pdf']:
        plt.savefig(f"{output_prefix}.{ext}", dpi=300 if ext == 'png' else None, bbox_inches='tight')
    print(f"✅ 已保存：{output_prefix}.png / .pdf")

if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument("xlsx_path")
    parser.add_argument("-o", "--output", default="summary_chart")
    args = parser.parse_args()
    plot_summary(args.xlsx_path, args.output)