"megatron/vscode:/vscode.git/clone" did not exist on "ed0c8714efcbbbba242f71876aa8fac39c2b6985"
evaluate_rd_tablebench.py 2.14 KB
Newer Older
jerrrrry's avatar
jerrrrry committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
import argparse
import glob
import json
import os
import re
import subprocess
import sys
import numpy as np

from .evaluate_mmmu import get_input_output_paths

# The rd-tablebench repo has functions for grading table predictions.
# Get the absolute path of the rd-tablebench repo
parent_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', 'rd-tablebench'))
# Add the parent directory to sys.path
sys.path.insert(0, parent_dir)

from grading import table_similarity
from convert import html_to_numpy


def convert_to_rdtablebench_format(input_path):
    """Convert input files to RDTableBench compatible format."""
    input_file_paths, output_file_path = get_input_output_paths(input_path, "RD_TableBench")

    output = []

    for input_file_path in input_file_paths:
        with open(input_file_path, "r") as input_file:
            for line in input_file:
                res = json.loads(line)
                output.append(res)

    output = sorted(output, key=lambda x: x["sample_id"])

    with open(output_file_path, "w") as output_file:
        json.dump(output, output_file)

    return output_file_path


def rdtablebench_eval(input_path):
    """Run RD-TableBench evaluation."""
    result_file = convert_to_rdtablebench_format(input_path)

    with open(result_file) as f:
        data = json.load(f)

    similarities = []
    num_failed = 0
    for sample in data:
        pred = sample["predict"]
        target = sample["ground_truth"]
        target_np = html_to_numpy(target)
        try:
            pred_np = html_to_numpy(pred)
            similarity = table_similarity(target_np, pred_np)
        except Exception as e:
            print("Failed to grade table: ", e)
            similarity = 0
            num_failed += 1
        similarities.append(similarity)

    print(f"Accuracy: {np.mean(similarities)}")
    print(f"Failed: {num_failed}")

def main():
    """Run RD-TableBench evaluation."""

    parser = argparse.ArgumentParser()
    parser.add_argument("--input-path", type=str, required=True, help="Path to input file(s)")
    args = parser.parse_args()

    rdtablebench_eval(args.input_path)


if __name__ == "__main__":
    main()