extract.py

import argparse
import re

parser = argparse.ArgumentParser(
    description="Aggregate extrema for selected columns in HPL tables"
)
parser.add_argument(
    "-i", "--input", required=True, help="Input file path, e.g. log/result.txt"
)
parser.add_argument(
    "-t",
    "--target",
    action="append",
    required=True,
    help="Column name to aggregate; repeatable, supports regex, e.g. -t 'GFLOPS' -t 'pdfact'",
)
parser.add_argument(
    "--op",
    choices=("max", "min"),
    default="max",
    help="Aggregation: max or min (default: max)",
)
args = parser.parse_args()

# Merge patterns into a single regex, using non-capturing group (?:) to ensure safe priority
pattern = re.compile("|".join(f"(?:{t})" for t in args.target))
agg_op = max if args.op == "max" else min

tables = []
cur_cols = None
cur_best = {}


def flush():
    if cur_best:
        tables.append(cur_best.copy())


with open(args.input) as f:
    for line in f:
        if "|" in line:
            if pattern.search(line):
                flush()
                # Split by | and strip whitespace, then filter columns by regex pattern
                parts = [c.strip() for c in line.split("|")]
                cur_cols = {
                    name: i
                    for i, name in enumerate(parts)
                    if name and pattern.search(name)
                }
                cur_best = {}
                continue

        if not cur_cols:
            continue

        # Table end: strictly match {"-", "|"} combination, ignore pure dashes ---
        stripped = line.strip()
        if not stripped or set(stripped) == {"-", "|"}:
            flush()
            cur_cols = None
            cur_best = {}
            continue

        parts = line.split("|")
        for name, idx in cur_cols.items():
            try:
                val = float(parts[idx])
                # Dictionary update logic
                cur_best[name] = agg_op(val, cur_best.get(name, val))
            except (IndexError, ValueError):
                pass

flush()

if not tables:
    print("No tables detected.")
    raise SystemExit(0)

actual_cols = list(tables[0].keys())
print(f"op: {args.op}")

# Build data rows in one step. str(t.get(name, "")) handles None conversion to empty string.
headers = ["Table"] + actual_cols
rows = [
    [str(i)] + [str(t.get(name, "")) for name in actual_cols]
    for i, t in enumerate(tables, 1)
]

# Use zip(*) matrix transpose to calculate column widths in one line
all_data = [headers] + rows
widths = [max(len(cell) for cell in col) for col in zip(*all_data)]


def fmt(row):
    return " | ".join(cell.ljust(w) for cell, w in zip(row, widths))


print(fmt(headers))
print("-+-".join("-" * w for w in widths))
for row in rows:
    print(fmt(row))